Skip to content
This repository has been archived by the owner on Apr 3, 2020. It is now read-only.

Commit

Permalink
[regexp] do not assume short external strings have a minimum size.
Browse files Browse the repository at this point in the history
Short external strings do not cache the resource data, and may be used
for compressible strings. The assumptions about their lengths is
invalid and may lead to oob reads.

R=jkummerow@chromium.org
BUG=v8:4923,chromium:604897
LOG=N

Review URL: https://codereview.chromium.org/1901573003

Cr-Commit-Position: refs/heads/master@{#35660}
  • Loading branch information
hashseed authored and Commit bot committed Apr 20, 2016
1 parent 4e93ce4 commit 3518e49
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 219 deletions.
67 changes: 28 additions & 39 deletions src/arm/code-stubs-arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1577,70 +1577,59 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ ldr(subject, MemOperand(sp, kSubjectOffset));
__ JumpIfSmi(subject, &runtime);
__ mov(r3, subject); // Make a copy of the original subject string.
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
// subject: subject string
// r3: subject string
// r0: subject string instance type
// regexp_data: RegExp data (FixedArray)
// Handle subject string according to its encoding and representation:
// (1) Sequential string? If yes, go to (5).
// (2) Anything but sequential or cons? If yes, go to (6).
// (3) Cons string. If the string is flat, replace subject with first string.
// Otherwise bailout.
// (4) Is subject external? If yes, go to (7).
// (5) Sequential string. Load regexp code according to encoding.
// (1) Sequential string? If yes, go to (4).
// (2) Sequential or cons? If not, go to (5).
// (3) Cons string. If the string is flat, replace subject with first string
// and go to (1). Otherwise bail out to runtime.
// (4) Sequential string. Load regexp code according to encoding.
// (E) Carry on.
/// [...]

// Deferred code at the end of the stub:
// (6) Not a long external string? If yes, go to (8).
// (7) External string. Make it, offset-wise, look like a sequential string.
// Go to (5).
// (8) Short external string or not a string? If yes, bail out to runtime.
// (9) Sliced string. Replace subject with parent. Go to (4).
// (5) Long external string? If not, go to (7).
// (6) External string. Make it, offset-wise, look like a sequential string.
// Go to (4).
// (7) Short external string or not a string? If yes, bail out to runtime.
// (8) Sliced string. Replace subject with parent. Go to (1).

Label seq_string /* 4 */, external_string /* 6 */, check_underlying /* 1 */,
not_seq_nor_cons /* 5 */, not_long_external /* 7 */;

Label seq_string /* 5 */, external_string /* 7 */,
check_underlying /* 4 */, not_seq_nor_cons /* 6 */,
not_long_external /* 8 */;
__ bind(&check_underlying);
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));

// (1) Sequential string? If yes, go to (5).
// (1) Sequential string? If yes, go to (4).
__ and_(r1,
r0,
Operand(kIsNotStringMask |
kStringRepresentationMask |
kShortExternalStringMask),
SetCC);
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
__ b(eq, &seq_string); // Go to (5).
__ b(eq, &seq_string); // Go to (4).

// (2) Anything but sequential or cons? If yes, go to (6).
// (2) Sequential or cons? If not, go to (5).
STATIC_ASSERT(kConsStringTag < kExternalStringTag);
STATIC_ASSERT(kSlicedStringTag > kExternalStringTag);
STATIC_ASSERT(kIsNotStringMask > kExternalStringTag);
STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag);
__ cmp(r1, Operand(kExternalStringTag));
__ b(ge, &not_seq_nor_cons); // Go to (6).
__ b(ge, &not_seq_nor_cons); // Go to (5).

// (3) Cons string. Check that it's flat.
// Replace subject with first string and reload instance type.
__ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset));
__ CompareRoot(r0, Heap::kempty_stringRootIndex);
__ b(ne, &runtime);
__ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
__ jmp(&check_underlying);

// (4) Is subject external? If yes, go to (7).
__ bind(&check_underlying);
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
STATIC_ASSERT(kSeqStringTag == 0);
__ tst(r0, Operand(kStringRepresentationMask));
// The underlying external string is never a short external string.
STATIC_ASSERT(ExternalString::kMaxShortLength < ConsString::kMinLength);
STATIC_ASSERT(ExternalString::kMaxShortLength < SlicedString::kMinLength);
__ b(ne, &external_string); // Go to (7).

// (5) Sequential string. Load regexp code according to encoding.
// (4) Sequential string. Load regexp code according to encoding.
__ bind(&seq_string);
// subject: sequential subject string (or look-alike, external string)
// r3: original subject string
Expand Down Expand Up @@ -1873,12 +1862,12 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ TailCallRuntime(Runtime::kRegExpExec);

// Deferred code for string handling.
// (6) Not a long external string? If yes, go to (8).
// (5) Long external string? If not, go to (7).
__ bind(&not_seq_nor_cons);
// Compare flags are still set.
__ b(gt, &not_long_external); // Go to (8).
__ b(gt, &not_long_external); // Go to (7).

// (7) External string. Make it, offset-wise, look like a sequential string.
// (6) External string. Make it, offset-wise, look like a sequential string.
__ bind(&external_string);
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
Expand All @@ -1895,15 +1884,15 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ sub(subject,
subject,
Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
__ jmp(&seq_string); // Go to (5).
__ jmp(&seq_string); // Go to (4).

// (8) Short external string or not a string? If yes, bail out to runtime.
// (7) Short external string or not a string? If yes, bail out to runtime.
__ bind(&not_long_external);
STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0);
__ tst(r1, Operand(kIsNotStringMask | kShortExternalStringMask));
__ b(ne, &runtime);

// (9) Sliced string. Replace subject with parent. Go to (4).
// (8) Sliced string. Replace subject with parent. Go to (4).
// Load offset into r9 and replace subject string with parent.
__ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset));
__ SmiUntag(r9);
Expand Down
80 changes: 34 additions & 46 deletions src/arm64/code-stubs-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1743,35 +1743,35 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ Peek(subject, kSubjectOffset);
__ JumpIfSmi(subject, &runtime);

__ Ldr(x10, FieldMemOperand(subject, HeapObject::kMapOffset));
__ Ldrb(string_type, FieldMemOperand(x10, Map::kInstanceTypeOffset));

__ Ldr(jsstring_length, FieldMemOperand(subject, String::kLengthOffset));

// Handle subject string according to its encoding and representation:
// (1) Sequential string? If yes, go to (5).
// (2) Anything but sequential or cons? If yes, go to (6).
// (3) Cons string. If the string is flat, replace subject with first string.
// Otherwise bailout.
// (4) Is subject external? If yes, go to (7).
// (5) Sequential string. Load regexp code according to encoding.
// (1) Sequential string? If yes, go to (4).
// (2) Sequential or cons? If not, go to (5).
// (3) Cons string. If the string is flat, replace subject with first string
// and go to (1). Otherwise bail out to runtime.
// (4) Sequential string. Load regexp code according to encoding.
// (E) Carry on.
/// [...]

// Deferred code at the end of the stub:
// (6) Not a long external string? If yes, go to (8).
// (7) External string. Make it, offset-wise, look like a sequential string.
// Go to (5).
// (8) Short external string or not a string? If yes, bail out to runtime.
// (9) Sliced string. Replace subject with parent. Go to (4).

Label check_underlying; // (4)
Label seq_string; // (5)
Label not_seq_nor_cons; // (6)
Label external_string; // (7)
Label not_long_external; // (8)

// (1) Sequential string? If yes, go to (5).
// (5) Long external string? If not, go to (7).
// (6) External string. Make it, offset-wise, look like a sequential string.
// Go to (4).
// (7) Short external string or not a string? If yes, bail out to runtime.
// (8) Sliced string. Replace subject with parent. Go to (1).

Label check_underlying; // (1)
Label seq_string; // (4)
Label not_seq_nor_cons; // (5)
Label external_string; // (6)
Label not_long_external; // (7)

__ Bind(&check_underlying);
__ Ldr(x10, FieldMemOperand(subject, HeapObject::kMapOffset));
__ Ldrb(string_type, FieldMemOperand(x10, Map::kInstanceTypeOffset));

// (1) Sequential string? If yes, go to (4).
__ And(string_representation,
string_type,
kIsNotStringMask |
Expand All @@ -1788,36 +1788,24 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// is a String
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
STATIC_ASSERT(kShortExternalStringTag != 0);
__ Cbz(string_representation, &seq_string); // Go to (5).
__ Cbz(string_representation, &seq_string); // Go to (4).

// (2) Anything but sequential or cons? If yes, go to (6).
// (2) Sequential or cons? If not, go to (5).
STATIC_ASSERT(kConsStringTag < kExternalStringTag);
STATIC_ASSERT(kSlicedStringTag > kExternalStringTag);
STATIC_ASSERT(kIsNotStringMask > kExternalStringTag);
STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag);
__ Cmp(string_representation, kExternalStringTag);
__ B(ge, &not_seq_nor_cons); // Go to (6).
__ B(ge, &not_seq_nor_cons); // Go to (5).

// (3) Cons string. Check that it's flat.
__ Ldr(x10, FieldMemOperand(subject, ConsString::kSecondOffset));
__ JumpIfNotRoot(x10, Heap::kempty_stringRootIndex, &runtime);
// Replace subject with first string.
__ Ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
__ B(&check_underlying);

// (4) Is subject external? If yes, go to (7).
__ Bind(&check_underlying);
// Reload the string type.
__ Ldr(x10, FieldMemOperand(subject, HeapObject::kMapOffset));
__ Ldrb(string_type, FieldMemOperand(x10, Map::kInstanceTypeOffset));
STATIC_ASSERT(kSeqStringTag == 0);
// The underlying external string is never a short external string.
STATIC_ASSERT(ExternalString::kMaxShortLength < ConsString::kMinLength);
STATIC_ASSERT(ExternalString::kMaxShortLength < SlicedString::kMinLength);
__ TestAndBranchIfAnySet(string_type.X(),
kStringRepresentationMask,
&external_string); // Go to (7).

// (5) Sequential string. Load regexp code according to encoding.
// (4) Sequential string. Load regexp code according to encoding.
__ Bind(&seq_string);

// Check that the third argument is a positive smi less than the subject
Expand Down Expand Up @@ -2087,12 +2075,12 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
__ TailCallRuntime(Runtime::kRegExpExec);

// Deferred code for string handling.
// (6) Not a long external string? If yes, go to (8).
// (5) Long external string? If not, go to (7).
__ Bind(&not_seq_nor_cons);
// Compare flags are still set.
__ B(ne, &not_long_external); // Go to (8).
__ B(ne, &not_long_external); // Go to (7).

// (7) External string. Make it, offset-wise, look like a sequential string.
// (6) External string. Make it, offset-wise, look like a sequential string.
__ Bind(&external_string);
if (masm->emit_debug_code()) {
// Assert that we do not have a cons or slice (indirect strings) here.
Expand All @@ -2110,21 +2098,21 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
// Move the pointer so that offset-wise, it looks like a sequential string.
STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqOneByteString::kHeaderSize);
__ Sub(subject, subject, SeqTwoByteString::kHeaderSize - kHeapObjectTag);
__ B(&seq_string); // Go to (5).
__ B(&seq_string); // Go to (4).

// (8) If this is a short external string or not a string, bail out to
// (7) If this is a short external string or not a string, bail out to
// runtime.
__ Bind(&not_long_external);
STATIC_ASSERT(kShortExternalStringTag != 0);
__ TestAndBranchIfAnySet(string_representation,
kShortExternalStringMask | kIsNotStringMask,
&runtime);

// (9) Sliced string. Replace subject with parent.
// (8) Sliced string. Replace subject with parent.
__ Ldr(sliced_string_offset,
UntagSmiFieldMemOperand(subject, SlicedString::kOffsetOffset));
__ Ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset));
__ B(&check_underlying); // Go to (4).
__ B(&check_underlying); // Go to (1).
#endif
}

Expand Down
Loading

0 comments on commit 3518e49

Please sign in to comment.