klee
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ModuleUtil.cpp
Go to the documentation of this file.
1 //===-- ModuleUtil.cpp ----------------------------------------------------===//
2 //
3 // The KLEE Symbolic Virtual Machine
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "klee/Config/Version.h"
12 // FIXME: This does not belong here.
13 #include "../Core/Common.h"
14 #include "../Core/SpecialFunctionHandler.h"
15 
16 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 4)
17 #include "llvm/IR/LLVMContext.h"
18 #endif
19 
20 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IRReader/IRReader.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/Object/Archive.h"
28 #include "llvm/Object/ObjectFile.h"
29 #include "llvm/Object/Error.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/IR/ValueSymbolTable.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/DataStream.h"
34 #else
35 #include "llvm/Function.h"
36 #include "llvm/Instructions.h"
37 #include "llvm/IntrinsicInst.h"
38 #include "llvm/Module.h"
39 #endif
40 
41 #include "llvm/Linker.h"
42 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
43 #include "llvm/Support/CFG.h"
44 #include "llvm/Support/CallSite.h"
45 #include "llvm/Support/Debug.h"
46 #include "llvm/Support/InstIterator.h"
47 #include "llvm/Support/raw_ostream.h"
48 #include "llvm/Analysis/ValueTracking.h"
49 #include "llvm/Support/Path.h"
50 
51 #include <map>
52 #include <set>
53 #include <fstream>
54 #include <sstream>
55 #include <string>
56 
57 using namespace llvm;
58 using namespace klee;
59 
60 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
61 static void
76 GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
77  static const std::string llvmIntrinsicPrefix="llvm.";
78  std::set<std::string> DefinedSymbols;
79  UndefinedSymbols.clear();
80  DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Computing undefined symbols ***\n");
81 
82  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
83  if (I->hasName()) {
84  if (I->isDeclaration())
85  UndefinedSymbols.insert(I->getName());
86  else if (!I->hasLocalLinkage()) {
87  assert(!I->hasDLLImportLinkage()
88  && "Found dllimported non-external symbol!");
89  DefinedSymbols.insert(I->getName());
90  }
91  }
92 
93  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
94  I != E; ++I)
95  if (I->hasName()) {
96  if (I->isDeclaration())
97  UndefinedSymbols.insert(I->getName());
98  else if (!I->hasLocalLinkage()) {
99  assert(!I->hasDLLImportLinkage()
100  && "Found dllimported non-external symbol!");
101  DefinedSymbols.insert(I->getName());
102  }
103  }
104 
105  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
106  I != E; ++I)
107  if (I->hasName())
108  DefinedSymbols.insert(I->getName());
109 
110 
111  // Prune out any defined symbols from the undefined symbols set
112  // and other symbols we don't want to treat as an undefined symbol
113  std::vector<std::string> SymbolsToRemove;
114  for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
115  I != UndefinedSymbols.end(); ++I )
116  {
117  if (DefinedSymbols.count(*I))
118  {
119  SymbolsToRemove.push_back(*I);
120  continue;
121  }
122 
123  // Strip out llvm intrinsics
124  if ( (I->size() >= llvmIntrinsicPrefix.size() ) &&
125  (I->compare(0, llvmIntrinsicPrefix.size(), llvmIntrinsicPrefix) == 0) )
126  {
127  DEBUG_WITH_TYPE("klee_linker", dbgs() << "LLVM intrinsic " << *I <<
128  " has will be removed from undefined symbols"<< "\n");
129  SymbolsToRemove.push_back(*I);
130  continue;
131  }
132 
133  // Symbol really is undefined
134  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Symbol " << *I << " is undefined.\n");
135  }
136 
137  // Remove KLEE intrinsics from set of undefined symbols
138  for (SpecialFunctionHandler::const_iterator sf = SpecialFunctionHandler::begin(),
139  se = SpecialFunctionHandler::end(); sf != se; ++sf)
140  {
141  if (UndefinedSymbols.find(sf->name) == UndefinedSymbols.end())
142  continue;
143 
144  SymbolsToRemove.push_back(sf->name);
145  DEBUG_WITH_TYPE("klee_linker", dbgs() << "KLEE intrinsic " << sf->name <<
146  " has will be removed from undefined symbols"<< "\n");
147  }
148 
149  // Now remove the symbols from undefined set.
150  for (size_t i = 0, j = SymbolsToRemove.size(); i < j; ++i )
151  UndefinedSymbols.erase(SymbolsToRemove[i]);
152 
153  DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Finished computing undefined symbols ***\n");
154 }
155 
156 
160 static void CleanUpLinkBCA(std::vector<Module*> &archiveModules)
161 {
162  for (std::vector<Module*>::iterator I = archiveModules.begin(), E = archiveModules.end();
163  I != E; ++I)
164  {
165  delete (*I);
166  }
167 }
168 
178 static bool linkBCA(object::Archive* archive, Module* composite, std::string& errorMessage)
179 {
180  llvm::raw_string_ostream SS(errorMessage);
181  std::vector<Module*> archiveModules;
182 
183  // Is this efficient? Could we use StringRef instead?
184  std::set<std::string> undefinedSymbols;
185  GetAllUndefinedSymbols(composite, undefinedSymbols);
186 
187  if (undefinedSymbols.size() == 0)
188  {
189  // Nothing to do
190  DEBUG_WITH_TYPE("klee_linker", dbgs() << "No undefined symbols. Not linking anything in!\n");
191  return true;
192  }
193 
194  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading modules\n");
195  // Load all bitcode files in to memory so we can examine their symbols
196  for (object::Archive::child_iterator AI = archive->begin_children(),
197  AE = archive->end_children(); AI != AE; ++AI)
198  {
199 
200  StringRef memberName;
201  error_code ec = AI->getName(memberName);
202 
203  if ( ec == errc::success )
204  {
205  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading archive member " << memberName << "\n");
206  }
207  else
208  {
209  errorMessage="Archive member does not have a name!\n";
210  return false;
211  }
212 
213  OwningPtr<object::Binary> child;
214  ec = AI->getAsBinary(child);
215  if (ec != object::object_error::success)
216  {
217  // If we can't open as a binary object file its hopefully a bitcode file
218 
219  OwningPtr<MemoryBuffer> buff; // Once this is destroyed will Module still be valid??
220  Module *Result = 0;
221 
222  if (error_code ec = AI->getMemoryBuffer(buff))
223  {
224  SS << "Failed to get MemoryBuffer: " <<ec.message();
225  SS.flush();
226  return false;
227  }
228 
229  if (buff)
230  {
231  // FIXME: Maybe load bitcode file lazily? Then if we need to link, materialise the module
232  Result = ParseBitcodeFile(buff.get(), getGlobalContext(), &errorMessage);
233 
234  if(!Result)
235  {
236  SS << "Loading module failed : " << errorMessage << "\n";
237  SS.flush();
238  return false;
239  }
240  archiveModules.push_back(Result);
241  }
242  else
243  {
244  errorMessage="Buffer was NULL!";
245  return false;
246  }
247 
248  }
249  else if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(child.get()))
250  {
251  SS << "Object file " << o->getFileName().data() <<
252  " in archive is not supported";
253  SS.flush();
254  return false;
255  }
256  else
257  {
258  SS << "Loading archive child with error "<< ec.message();
259  SS.flush();
260  return false;
261  }
262 
263  }
264 
265  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loaded " << archiveModules.size() << " modules\n");
266 
267 
268  std::set<std::string> previouslyUndefinedSymbols;
269 
270  // Walk through the modules looking for definitions of undefined symbols
271  // if we find a match we should link that module in.
272  unsigned int passCounter=0;
273  do
274  {
275  unsigned int modulesLoadedOnPass=0;
276  previouslyUndefinedSymbols = undefinedSymbols;
277 
278  for (size_t i = 0, j = archiveModules.size(); i < j; ++i)
279  {
280  // skip empty archives
281  if (archiveModules[i] == 0)
282  continue;
283  Module * M = archiveModules[i];
284  // Look for the undefined symbols in the composite module
285  for (std::set<std::string>::iterator S = undefinedSymbols.begin(), SE = undefinedSymbols.end();
286  S != SE; ++S)
287  {
288 
289  // FIXME: We aren't handling weak symbols here!
290  // However the algorithm used in LLVM3.2 didn't seem to either
291  // so maybe it doesn't matter?
292 
293  if ( GlobalValue* GV = dyn_cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(*S)))
294  {
295  if (GV->isDeclaration()) continue; // Not a definition
296 
297  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Found " << GV->getName() <<
298  " in " << M->getModuleIdentifier() << "\n");
299 
300 
301  if (Linker::LinkModules(composite, M, Linker::DestroySource, &errorMessage))
302  {
303  // Linking failed
304  SS << "Linking archive module with composite failed:" << errorMessage;
305  SS.flush();
306  CleanUpLinkBCA(archiveModules);
307  return false;
308  }
309  else
310  {
311  // Link succeed, now clean up
312  modulesLoadedOnPass++;
313  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking succeeded.\n");
314 
315  delete M;
316  archiveModules[i] = 0;
317 
318  // We need to recompute the undefined symbols in the composite module
319  // after linking
320  GetAllUndefinedSymbols(composite, undefinedSymbols);
321 
322  break; // Look for symbols in next module
323  }
324 
325  }
326  }
327  }
328 
329  passCounter++;
330  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Completed " << passCounter <<
331  " linker passes.\n" << modulesLoadedOnPass <<
332  " modules loaded on the last pass\n");
333  } while (undefinedSymbols != previouslyUndefinedSymbols); // Iterate until we reach a fixed point
334 
335 
336  // What's left in archiveModules we don't want to link in so free it
337  CleanUpLinkBCA(archiveModules);
338 
339  return true;
340 
341 }
342 #endif
343 
344 
345 Module *klee::linkWithLibrary(Module *module,
346  const std::string &libraryName) {
347 DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking file " << libraryName << "\n");
348 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
349  if (!sys::fs::exists(libraryName)) {
350  klee_error("Link with library %s failed. No such file.",
351  libraryName.c_str());
352  }
353 
354  OwningPtr<MemoryBuffer> Buffer;
355  if (error_code ec = MemoryBuffer::getFile(libraryName,Buffer)) {
356  klee_error("Link with library %s failed: %s", libraryName.c_str(),
357  ec.message().c_str());
358  }
359 
360  sys::fs::file_magic magic = sys::fs::identify_magic(Buffer->getBuffer());
361 
362  LLVMContext &Context = getGlobalContext();
363  std::string ErrorMessage;
364 
365  if (magic == sys::fs::file_magic::bitcode) {
366  Module *Result = 0;
367  Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
368 
369 
370  if (!Result || Linker::LinkModules(module, Result, Linker::DestroySource,
371  &ErrorMessage))
372  klee_error("Link with library %s failed: %s", libraryName.c_str(),
373  ErrorMessage.c_str());
374 
375  delete Result;
376 
377  } else if (magic == sys::fs::file_magic::archive) {
378  OwningPtr<object::Binary> arch;
379  if (error_code ec = object::createBinary(Buffer.take(), arch))
380  klee_error("Link with library %s failed: %s", libraryName.c_str(),
381  ec.message().c_str());
382 
383  if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) {
384  // Handle in helper
385  if (!linkBCA(a, module, ErrorMessage))
386  klee_error("Link with library %s failed: %s", libraryName.c_str(),
387  ErrorMessage.c_str());
388  }
389  else {
390  klee_error("Link with library %s failed: Cast to archive failed", libraryName.c_str());
391  }
392 
393  } else if (magic.is_object()) {
394  OwningPtr<object::Binary> obj;
395  if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(obj.get())) {
396  klee_warning("Link with library: Object file %s in archive %s found. "
397  "Currently not supported.",
398  o->getFileName().data(), libraryName.c_str());
399  }
400  } else {
401  klee_error("Link with library %s failed: Unrecognized file type.",
402  libraryName.c_str());
403  }
404 
405  return module;
406 #else
407  Linker linker("klee", module, false);
408 
409  llvm::sys::Path libraryPath(libraryName);
410  bool native = false;
411 
412  if (linker.LinkInFile(libraryPath, native)) {
413  klee_error("Linking library %s failed", libraryName.c_str());
414  }
415 
416  return linker.releaseModule();
417 #endif
418 }
419 
420 
421 
422 Function *klee::getDirectCallTarget(CallSite cs) {
423  Value *v = cs.getCalledValue();
424  if (Function *f = dyn_cast<Function>(v)) {
425  return f;
426  } else if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(v)) {
427  if (ce->getOpcode()==Instruction::BitCast)
428  if (Function *f = dyn_cast<Function>(ce->getOperand(0)))
429  return f;
430 
431  // NOTE: This assert may fire, it isn't necessarily a problem and
432  // can be disabled, I just wanted to know when and if it happened.
433  assert(0 && "FIXME: Unresolved direct target for a constant expression.");
434  }
435 
436  return 0;
437 }
438 
439 static bool valueIsOnlyCalled(const Value *v) {
440  for (Value::const_use_iterator it = v->use_begin(), ie = v->use_end();
441  it != ie; ++it) {
442  if (const Instruction *instr = dyn_cast<Instruction>(*it)) {
443  if (instr->getOpcode()==0) continue; // XXX function numbering inst
444  if (!isa<CallInst>(instr) && !isa<InvokeInst>(instr)) return false;
445 
446  // Make sure that the value is only the target of this call and
447  // not an argument.
448  for (unsigned i=1,e=instr->getNumOperands(); i!=e; ++i)
449  if (instr->getOperand(i)==v)
450  return false;
451  } else if (const llvm::ConstantExpr *ce =
452  dyn_cast<llvm::ConstantExpr>(*it)) {
453  if (ce->getOpcode()==Instruction::BitCast)
454  if (valueIsOnlyCalled(ce))
455  continue;
456  return false;
457  } else if (const GlobalAlias *ga = dyn_cast<GlobalAlias>(*it)) {
458  // XXX what about v is bitcast of aliasee?
459  if (v==ga->getAliasee() && !valueIsOnlyCalled(ga))
460  return false;
461  } else {
462  return false;
463  }
464  }
465 
466  return true;
467 }
468 
469 bool klee::functionEscapes(const Function *f) {
470  return !valueIsOnlyCalled(f);
471 }
static void GetAllUndefinedSymbols(Module *M, std::set< std::string > &UndefinedSymbols)
Definition: ModuleUtil.cpp:76
void klee_error(const char *msg,...) __attribute__((format(printf
Definition: Common.cpp:73
static bool linkBCA(object::Archive *archive, Module *composite, std::string &errorMessage)
Definition: ModuleUtil.cpp:178
bool functionEscapes(const llvm::Function *f)
void klee_warning(char *name)
Definition: klee-replay.c:390
llvm::Module * linkWithLibrary(llvm::Module *module, const std::string &libraryName)
Link a module with a specified bitcode archive.
llvm::Function * getDirectCallTarget(llvm::CallSite)
Context - Helper class for storing global information about a KLEE run.
Definition: Context.h:18
static void CleanUpLinkBCA(std::vector< Module * > &archiveModules)
Definition: ModuleUtil.cpp:160
static bool valueIsOnlyCalled(const Value *v)
Definition: ModuleUtil.cpp:439