From 5d8bb5c7c5412fa6f54d5c9d6cf1878ed772a240 Mon Sep 17 00:00:00 2001
From: Filipe Cabecinhas <me@filcab.net>
Date: Sat, 24 Jan 2015 04:15:05 +0000
Subject: [PATCH] [Bitcode] Diagnose errors instead of asserting from bad input

Eventually we can make some of these pass the error along to the caller.

Reports a fatal error if:
We find an invalid abbrev record
We try to get an invalid abbrev number
We can't fill the current word due to an EOF

Fixed an invalid bitcode test to check for output with FileCheck

Bugs found with afl-fuzz

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226986 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Bitcode/BitstreamReader.h           |   9 +++++----
 lib/Bitcode/Reader/BitstreamReader.cpp           |   6 +++++-
 test/Bitcode/Inputs/invalid-abbrev.bc            | Bin 0 -> 129 bytes
 test/Bitcode/Inputs/invalid-bad-abbrev-number.bc |   1 +
 test/Bitcode/Inputs/invalid-unexpected-eof.bc    |   1 +
 test/Bitcode/invalid.test                        |  14 +++++++++++++-
 6 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 test/Bitcode/Inputs/invalid-abbrev.bc
 create mode 100644 test/Bitcode/Inputs/invalid-bad-abbrev-number.bc
 create mode 100644 test/Bitcode/Inputs/invalid-unexpected-eof.bc

diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 865a3e66842..bc3e48a4341 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -315,7 +315,8 @@ public:
   }
 
   void fillCurWord() {
-    assert(Size == 0 || NextChar < (unsigned)Size);
+    if (Size != 0 && NextChar >= (unsigned)Size)
+      report_fatal_error("Unexpected end of file");
 
     // Read the next word from the stream.
     uint8_t Array[sizeof(word_t)] = {0};
@@ -490,11 +491,11 @@ private:
   //===--------------------------------------------------------------------===//
 
 public:
-
   /// Return the abbreviation for the specified AbbrevId.
   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
-    unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
-    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
+    if (AbbrevNo >= CurAbbrevs.size())
+      report_fatal_error("Invalid abbrev number");
     return CurAbbrevs[AbbrevNo].get();
   }
 
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index 5e3232e5313..9d5fab9147a 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -170,8 +170,12 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
   unsigned Code;
   if (CodeOp.isLiteral())
     Code = CodeOp.getLiteralValue();
-  else
+  else {
+    if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
+        CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
+      report_fatal_error("Abbreviation starts with an Array or a Blob");
     Code = readAbbreviatedField(*this, CodeOp);
+  }
 
   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
diff --git a/test/Bitcode/Inputs/invalid-abbrev.bc b/test/Bitcode/Inputs/invalid-abbrev.bc
new file mode 100644
index 0000000000000000000000000000000000000000..4e8f3944b84b9b1e6d8e604f89ecefe9dee4503e
GIT binary patch
literal 129
zcmZ>AK5$Qw#{dktn-svb5Ca25fC^+M#7F}pB@Rx7l-3l3B`qw%N(L<x3=DiadJH7a
F006X(925Wm

literal 0
HcmV?d00001

diff --git a/test/Bitcode/Inputs/invalid-bad-abbrev-number.bc b/test/Bitcode/Inputs/invalid-bad-abbrev-number.bc
new file mode 100644
index 00000000000..e4e1fb3e4b3
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-bad-abbrev-number.bc
@@ -0,0 +1 @@
+BC��!0000000000
\ No newline at end of file
diff --git a/test/Bitcode/Inputs/invalid-unexpected-eof.bc b/test/Bitcode/Inputs/invalid-unexpected-eof.bc
new file mode 100644
index 00000000000..a487393d066
--- /dev/null
+++ b/test/Bitcode/Inputs/invalid-unexpected-eof.bc
@@ -0,0 +1 @@
+BC��!00000000000000000000
\ No newline at end of file
diff --git a/test/Bitcode/invalid.test b/test/Bitcode/invalid.test
index 5f88da45f6a..19ef3267cc6 100644
--- a/test/Bitcode/invalid.test
+++ b/test/Bitcode/invalid.test
@@ -1 +1,13 @@
-RUN: not llvm-dis -disable-output %p/Inputs/invalid-pr20485.bc
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-pr20485.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=INVALID-ENCODING %s
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-abbrev.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=BAD-ABBREV %s
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-unexpected-eof.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=UNEXPECTED-EOF %s
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-bad-abbrev-number.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=BAD-ABBREV-NUMBER %s
+
+INVALID-ENCODING: Invalid encoding
+BAD-ABBREV: Abbreviation starts with an Array or a Blob
+UNEXPECTED-EOF: Unexpected end of file
+BAD-ABBREV-NUMBER: Invalid abbrev number