-
Notifications
You must be signed in to change notification settings - Fork 855
Expand file tree
/
Copy pathfuzzing.h
More file actions
603 lines (496 loc) · 19.5 KB
/
fuzzing.h
File metadata and controls
603 lines (496 loc) · 19.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
/*
* Copyright 2017 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//
// Translate a binary stream of bytes into a valid wasm module, *somehow*.
// This is helpful for fuzzing.
//
#include "ir/branch-utils.h"
#include "ir/struct-utils.h"
#include "support/insert_ordered.h"
#include "tools/fuzzing/random.h"
#include <ir/eh-utils.h>
#include <ir/find_all.h>
#include <ir/literal-utils.h>
#include <ir/manipulation.h>
#include <ir/names.h>
#include <ir/public-type-validator.h>
#include <ir/utils.h>
#include <support/file.h>
#include <tools/optimization-options.h>
#include <wasm-builder.h>
namespace wasm {
// helper structs, since list initialization has a fixed order of
// evaluation, avoiding UB
struct ThreeArgs {
Expression* a;
Expression* b;
Expression* c;
};
struct UnaryArgs {
UnaryOp a;
Expression* b;
};
struct BinaryArgs {
BinaryOp a;
Expression* b;
Expression* c;
};
// params
struct FuzzParams {
// The maximum amount of params to each function.
int MAX_PARAMS;
// The maximum amount of vars in each function.
int MAX_VARS;
// The maximum number of globals in a module.
int MAX_GLOBALS;
// The maximum number of tuple elements.
int MAX_TUPLE_SIZE;
// The maximum number of struct fields.
int MAX_STRUCT_SIZE;
// The maximum number of elements in an array.
int MAX_ARRAY_SIZE;
// The number of nontrivial heap types to generate.
int MIN_HEAPTYPES;
int MAX_HEAPTYPES;
// some things require luck, try them a few times
int TRIES;
// beyond a nesting limit, greatly decrease the chance to continue to nest
int NESTING_LIMIT;
// the maximum size of a block
int BLOCK_FACTOR;
// the memory that we use, a small portion so that we have a good chance of
// looking at writes (we also look outside of this region with small
// probability) this should be a power of 2
Address USABLE_MEMORY;
// the number of runtime iterations (function calls, loop backbranches) we
// allow before we stop execution with a trap, to prevent hangs. 0 means
// no hang protection.
int HANG_LIMIT;
// the maximum amount of new GC types (structs, etc.) to create
int MAX_NEW_GC_TYPES;
// the maximum amount of catches in each try (not including a catch-all, if
// present).
int MAX_TRY_CATCHES;
FuzzParams() { setDefaults(); }
void setDefaults();
};
// main reader
class TranslateToFuzzReader {
static constexpr size_t VeryImportant = 4;
static constexpr size_t Important = 2;
public:
TranslateToFuzzReader(Module& wasm,
std::vector<char>&& input,
bool closedWorld = false);
TranslateToFuzzReader(Module& wasm,
std::string& filename,
bool closedWorld = false);
void pickPasses(OptimizationOptions& options);
void setAllowMemory(bool allowMemory_) { allowMemory = allowMemory_; }
void setAllowOOB(bool allowOOB_) { allowOOB = allowOOB_; }
void setPreserveImportsAndExports(bool preserveImportsAndExports_) {
preserveImportsAndExports = preserveImportsAndExports_;
}
void setImportedModule(std::string importedModuleName);
void build();
Module& wasm;
private:
// Whether the module will be tested in a closed-world environment.
bool closedWorld;
Builder builder;
Random random;
Intrinsics intrinsics;
// Whether to emit memory operations like loads and stores.
bool allowMemory = true;
// Whether to emit loads, stores, and call_indirects that may be out
// of bounds (which traps in wasm, and is undefined behavior in C).
bool allowOOB = true;
// Whether we preserve imports and exports. Normally we add imports (for
// logging and other useful functionality for testing), and add exports of
// functions as we create them. With this set, we add neither imports nor
// exports, which is useful if the tool using us only wants us to mutate an
// existing testcase (using initial-content).
bool preserveImportsAndExports = false;
// An optional module to import from.
std::optional<Module> importedModule;
// Whether we allow the fuzzer to add unreachable code when generating changes
// to existing code. This is randomized during startup, but could be an option
// like the above options eventually if we find that useful.
bool allowAddingUnreachableCode;
// Whether to emit atomic waits (which in single-threaded mode, may hang...)
static const bool ATOMIC_WAITS = false;
// The chance to emit a logging operation for a none expression. We
// randomize this in each function.
unsigned LOGGING_PERCENT = 0;
Name HANG_LIMIT_GLOBAL;
Name funcrefTableName;
Name exnrefTableName;
std::unordered_map<Type, Name> logImportNames;
Name hashMemoryName;
Name throwImportName;
Name tableGetImportName;
Name tableSetImportName;
Name callExportImportName;
Name callExportCatchImportName;
Name callRefImportName;
Name callRefCatchImportName;
Name sleepImportName;
Name importedGlobalModuleName = "__fuzz_import";
std::unordered_map<Type, std::vector<Name>> globalsByType;
std::unordered_map<Type, std::vector<Name>> mutableGlobalsByType;
std::unordered_map<Type, std::vector<Name>> immutableGlobalsByType;
std::unordered_map<Type, std::vector<Name>> importedImmutableGlobalsByType;
const std::vector<Type> loggableTypes;
// The heap types we can pick from to generate instructions.
std::vector<HeapType> interestingHeapTypes;
// A mapping of a heap type to the subset of interestingHeapTypes that are
// subtypes of it.
std::unordered_map<HeapType, std::vector<HeapType>> interestingHeapSubTypes;
// Type => list of struct fields that have that type.
std::unordered_map<Type, std::vector<StructField>> typeStructFields;
// Type => list of array types that have that type.
std::unordered_map<Type, std::vector<HeapType>> typeArrays;
// All struct fields that are mutable.
std::vector<StructField> mutableStructFields;
// All arrays that are mutable.
std::vector<HeapType> mutableArrays;
// Mapping of signatures to the continuations they are used by.
std::unordered_map<HeapType, std::vector<HeapType>> sigConts;
// All tags that are valid as exception tags (which cannot have results).
std::vector<Tag*> exceptionTags;
// All functions marked jsCalled.
std::vector<Name> jsCalled;
Index numAddedFunctions = 0;
// The name of an empty tag.
Name trivialTag;
// Whether we were given initial functions.
bool haveInitialFunctions;
// RAII helper for managing the state used to create a single function.
struct FunctionCreationContext {
TranslateToFuzzReader& parent;
Function* func;
std::vector<Expression*> breakableStack; // things we can break to
Index labelIndex = 0;
// a list of things relevant to computing the odds of an infinite loop,
// which we try to minimize the risk of
std::vector<Expression*> hangStack;
// type => list of locals with that type
std::unordered_map<Type, std::vector<Index>> typeLocals;
FunctionCreationContext(TranslateToFuzzReader& parent, Function* func);
~FunctionCreationContext();
// Fill in the typeLocals data structure.
void computeTypeLocals() {
typeLocals.clear();
for (Index i = 0; i < func->getNumLocals(); i++) {
typeLocals[func->getLocalType(i)].push_back(i);
}
}
};
FunctionCreationContext* funcContext = nullptr;
// The fuzzing parameters we use. This may change from function to function or
// even in a more refined manner, so we use an RAII context to manage it.
struct FuzzParamsContext : public FuzzParams {
TranslateToFuzzReader& parent;
FuzzParamsContext* old;
FuzzParamsContext(TranslateToFuzzReader& parent)
: parent(parent), old(parent.fuzzParams) {
parent.fuzzParams = this;
}
~FuzzParamsContext() { parent.fuzzParams = old; }
};
FuzzParamsContext* fuzzParams = nullptr;
// The default global context we use throughout the process (unless it is
// overridden using another context in an RAII manner).
std::unique_ptr<FuzzParamsContext> globalParams;
const std::vector<MemoryOrder> atomicMemoryOrders;
public:
int nesting = 0;
struct AutoNester {
TranslateToFuzzReader& parent;
size_t amount = 1;
AutoNester(TranslateToFuzzReader& parent) : parent(parent) {
parent.nesting++;
}
~AutoNester() { parent.nesting -= amount; }
// Add more nesting manually.
void add(size_t more) {
parent.nesting += more;
amount += more;
}
};
private:
// Generating random data is common enough that it's worth having helpers that
// forward to `random`.
int8_t get() { return random.get(); }
int16_t get16() { return random.get16(); }
int32_t get32() { return random.get32(); }
int64_t get64() { return random.get64(); }
float getFloat() { return random.getFloat(); }
double getDouble() { return random.getDouble(); }
Index upTo(Index x) { return random.upTo(x); }
bool oneIn(Index x) { return random.oneIn(x); }
Index upToSquared(Index x) { return random.upToSquared(x); }
// Pick from a vector-like container or a fixed list.
template<typename T> const typename T::value_type& pick(const T& vec) {
return random.pick(vec);
}
template<typename T, typename... Args> T pick(T first, Args... args) {
return random.pick(first, args...);
}
// Pick from options associated with features.
template<typename T> using FeatureOptions = Random::FeatureOptions<T>;
template<typename T> const T pick(FeatureOptions<T>& picker) {
return random.pick(picker);
}
// Setup methods
void setupMemory();
void setupHeapTypes();
void setupTables();
bool isImportableGlobalType(Type type);
bool isImportableGlobal(Global* global);
void setupGlobals();
void setupTags();
void addTag();
void finalizeMemory();
void finalizeTable();
void shuffleExports();
void prepareHangLimitSupport();
void addHangLimitSupport();
void addImportLoggingSupport();
void addImportCallingSupport();
void addImportThrowingSupport();
void addImportTableSupport();
void addImportSleepSupport();
void addHashMemorySupport();
// Special expression makers
Expression* makeHangLimitCheck();
Expression* makeImportLogging();
Expression* makeImportThrowing(Type type);
Expression* makeImportTableGet();
Expression* makeImportTableSet(Type type);
// Call either an export or a ref. We do this from a single function to better
// control the frequency of each.
Expression* makeImportCallCode(Type type);
Expression* makeImportSleep(Type type);
Expression* makeMemoryHashLogging();
// We must be careful not to add exports that have invalid public types, such
// as those that reach exact types when custom descriptors is disabled.
PublicTypeValidator publicTypeValidator;
bool isValidPublicType(Type type) {
return publicTypeValidator.isValidPublicType(type);
}
// Function operations. The main processFunctions() loop will call addFunction
// as well as modFunction().
void processFunctions();
// Add a new function.
Function* addFunction();
// Modify an existing function.
void modFunction(Function* func);
void addHangLimitChecks(Function* func);
void useImportedFunctions();
void useImportedGlobals();
// Recombination and mutation
// Recombination and mutation can replace a node with another node of the same
// type, but should not do so for certain types that are dangerous. For
// example, it would be bad to add a non-nullable reference to a tuple, as
// that would force us to use temporary locals for the tuple, but non-nullable
// references cannot always be stored in locals. Also, the 'pop' pseudo
// instruction for EH is supposed to exist only at the beginning of a 'catch'
// block, so it shouldn't be moved around or deleted freely.
bool canBeArbitrarilyReplaced(Expression* curr) {
// TODO: Remove this once we better support exact references.
if (curr->type.isExact()) {
return false;
}
return curr->type.isDefaultable() &&
!EHUtils::containsValidDanglingPop(curr);
}
void recombine(Function* func);
void mutate(Function* func);
// Fix up the IR for closed world.
void fixClosedWorld(Function* func);
// Fix up the IR after recombination and mutation (which may break the IR).
void fixAfterChanges(Function* func);
void modifyInitialFunctions();
// Note a global for use during code generation.
void useGlobalLater(Global* global);
// Initial wasm contents may have come from a test that uses the drop pattern:
//
// (drop ..something interesting..)
//
// The dropped interesting thing is optimized to some other interesting thing
// by a pass, and we verify it is the expected one. But this does not use the
// value in a way the fuzzer can notice. Replace some drops with a logging
// operation instead.
void dropToLog(Function* func);
// the fuzzer external interface sends in zeros (simpler to compare
// across invocations from JS or wasm-opt etc.). Add invocations in
// the wasm, so they run everywhere
void addInvocations(Function* func);
Name makeLabel() {
return std::string("label$") + std::to_string(funcContext->labelIndex++);
}
// Expression making methods. Always call the toplevel make(type) command, not
// the specific ones.
Expression* make(Type type);
Expression* _makeConcrete(Type type);
Expression* _makenone();
Expression* _makeunreachable();
// Make something with no chance of infinite recursion.
Expression* makeTrivial(Type type);
// We must note when we are nested in a makeTrivial() call. When we are, all
// operations must try to be as trivial as possible.
int trivialNesting = 0;
// Specific expression creators
Expression* makeBlock(Type type);
Expression* makeLoop(Type type);
Expression* makeCondition();
// Make something, with a good chance of it being a block
Expression* makeMaybeBlock(Type type);
Expression* buildIf(const struct ThreeArgs& args, Type type);
Expression* makeIf(Type type);
Expression* makeTry(Type type);
Expression* makeTryTable(Type type);
Expression* makeBreak(Type type);
Expression* makeCall(Type type);
Expression* makeCallIndirect(Type type);
Expression* makeCallRef(Type type);
Expression* makeLocalGet(Type type);
Expression* makeLocalSet(Type type);
Expression* makeGlobalGet(Type type);
Expression* makeGlobalSet(Type type);
Expression* makeTupleMake(Type type);
Expression* makeTupleExtract(Type type);
Expression* makePointer();
Expression* makeNonAtomicLoad(Type type);
Expression* makeLoad(Type type);
Expression* makeNonAtomicStore(Type type);
Expression* makeStore(Type type);
// Makes a small change to a constant value.
Literal tweak(Literal value);
Literal makeLiteral(Type type);
Expression* makeRefFuncConst(Type type);
// Emit a constant expression for a given type, as best we can. We may not be
// able to emit a literal Const, like say if the type is a function reference
// then we may emit a RefFunc, but also we may have other requirements, like
// we may add a GC cast to fixup the type.
Expression* makeConst(Type type);
// Generate reference values. One function handles basic types, and the other
// compound ones.
Expression* makeBasicRef(Type type);
Expression* makeCompoundRef(Type type);
Expression* makeStringConst();
Expression* makeStringNewArray();
Expression* makeStringNewCodePoint();
Expression* makeStringConcat();
Expression* makeStringSlice();
Expression* makeStringEq(Type type);
Expression* makeStringMeasure(Type type);
Expression* makeStringGet(Type type);
Expression* makeStringEncode(Type type);
// Similar to makeBasic/CompoundRef, but indicates that this value will be
// used in a place that will trap on null. For example, the reference of a
// struct.get or array.set would use this.
Expression* makeTrappingRefUse(HeapType type);
Expression* makeTrappingRefUse(Type type);
Expression* buildUnary(const UnaryArgs& args);
Expression* makeUnary(Type type);
Expression* buildBinary(const BinaryArgs& args);
Expression* makeBinary(Type type);
Expression* buildSelect(const ThreeArgs& args);
Expression* makeSelect(Type type);
Expression* makeSwitch(Type type);
Expression* makeDrop(Type type);
Expression* makeReturn(Type type);
Expression* makeNop(Type type);
Expression* makeUnreachable(Type type);
Expression* makeAtomic(Type type);
Expression* makeSIMD(Type type);
Expression* makeSIMDExtract(Type type);
Expression* makeSIMDReplace();
Expression* makeSIMDShuffle();
Expression* makeSIMDTernary();
Expression* makeSIMDShift();
Expression* makeSIMDLoad();
Expression* makeBulkMemory(Type type);
Expression* makeTableGet(Type type);
Expression* makeTableSet(Type type);
// TODO: support other RefIs variants, and rename this
Expression* makeRefIsNull(Type type);
Expression* makeRefEq(Type type);
Expression* makeRefTest(Type type);
Expression* makeRefCast(Type type);
Expression* makeRefGetDesc(Type type);
Expression* makeBrOn(Type type);
Expression* makeContBind(Type type);
// TODO: Expression* makeResume(Type type);
// Decide to emit a signed Struct/ArrayGet sometimes, when the field is
// packed.
bool maybeSignedGet(const Field& field);
Expression* makeStructGet(Type type);
Expression* makeStructRMW(Type type);
Expression* makeStructCmpxchg(Type type);
Expression* makeStructSet(Type type);
Expression* makeArrayGet(Type type);
Expression* makeArraySet(Type type);
Expression* makeArrayRMW(Type type);
Expression* makeArrayCmpxchg(Type type);
// Use a single method for the misc array operations, to not give them too
// much representation (e.g. compared to struct operations, which only include
// get/set).
Expression* makeArrayBulkMemoryOp(Type type);
Expression* makeI31Get(Type type);
Expression* makeThrow(Type type);
Expression* makeThrowRef(Type type);
Expression* makeMemoryInit();
Expression* makeDataDrop();
Expression* makeMemoryCopy();
Expression* makeMemoryFill();
// Getters for Types
Type getSingleConcreteType();
Type getReferenceType();
Type getCastableReferenceType();
Type getEqReferenceType();
Type getMVPType();
Type getTupleType();
Type getConcreteType();
Type getControlFlowType();
Type getStorableType();
Type getLoggableType();
bool isLoggableType(Type type);
Nullability getNullability();
Exactness getExactness();
Nullability getSubType(Nullability nullability);
Exactness getSubType(Exactness exactness);
HeapType getSubType(HeapType type);
Type getSubType(Type type);
Nullability getSuperType(Nullability nullability);
HeapType getSuperType(HeapType type);
Type getSuperType(Type type);
// Utilities
Name getTargetName(Expression* target);
Type getTargetType(Expression* target);
// Checks if a function is valid to take a ref.func of.
bool isValidRefFuncTarget(Name func);
// Checks if a function is a callRef* import (call-ref or call-ref-catch).
bool isCallRefImport(Name func);
// statistical distributions
// 0 to the limit, logarithmic scale
Index logify(Index x) {
return std::floor(std::log(std::max(Index(1) + x, Index(1))));
}
};
} // namespace wasm