Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Test that /messages works on remote homeserver and can be backfilled properly after many batches (MSC2716) #214

Closed
Changes from 17 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8099f47
Test that /messages can be backfilled properly after many batches
MadLittleMods Oct 19, 2021
094c5f7
Add message suffix to better distinguish messages
MadLittleMods Oct 21, 2021
e30bcd4
v4 room version
MadLittleMods Oct 30, 2021
1e333d6
Make sure marker event is sent
MadLittleMods Nov 3, 2021
2022b31
Cleanup tests
MadLittleMods Nov 3, 2021
d325349
Some test cleanup and comments
MadLittleMods Nov 3, 2021
2fe5180
Delays not needed for test servers (get result ASAP)
MadLittleMods Nov 11, 2021
0604564
Fix typo
MadLittleMods Nov 11, 2021
83adbe2
Merge branch 'master' into madlittlemods/test-backfill-and-messages-s…
MadLittleMods Nov 11, 2021
4aba836
Fix missing params after merge
MadLittleMods Nov 11, 2021
ffbca43
Make sure historical state doesn't appear between batches
MadLittleMods Dec 2, 2021
37109fa
Re-use JSONArrayEach
MadLittleMods Dec 2, 2021
cc7236b
Merge branch 'master' into madlittlemods/test-backfill-and-messages-s…
MadLittleMods Dec 17, 2021
4c8284a
v4 was never merged
MadLittleMods Dec 17, 2021
9b90429
Merge branch 'master' into madlittlemods/test-backfill-and-messages-s…
MadLittleMods Dec 17, 2021
677836b
Merge branch 'master' into madlittlemods/test-backfill-and-messages-s…
MadLittleMods Jan 13, 2022
85eb7bd
Merge branch 'main' into madlittlemods/test-backfill-and-messages-sti…
MadLittleMods Mar 30, 2022
3532821
Merge branch 'main' into madlittlemods/test-backfill-and-messages-sti…
MadLittleMods May 13, 2022
1667e15
Merge branch 'main' into madlittlemods/test-backfill-and-messages-sti…
MadLittleMods Aug 10, 2022
0589546
Merge branch 'main' into madlittlemods/test-backfill-and-messages-sti…
MadLittleMods Sep 20, 2022
606197a
Update test name
MadLittleMods Sep 21, 2022
d679384
Changes and debugging
MadLittleMods Sep 29, 2022
230c46e
Mulitply timeout by the number of requests we expect
MadLittleMods Sep 29, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 171 additions & 5 deletions tests/msc2716_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,77 @@ func TestImportHistoricalMessages(t *testing.T) {
},
})
})

t.Run("Backfill still works after many batches are imported", func(t *testing.T) {
MadLittleMods marked this conversation as resolved.
Show resolved Hide resolved
t.Parallel()

roomID := as.CreateRoom(t, createPublicRoomOpts)
alice.JoinRoom(t, roomID, nil)

// Create some normal messages in the timeline
eventIDsBefore := createMessagesInRoom(t, alice, roomID, 2, "eventIDsBefore")
eventIdBefore := eventIDsBefore[len(eventIDsBefore)-1]
timeAfterEventBefore := time.Now()

// We chose the magic number 11 because Synapse currently limits the
// backfill extremities to 5. 10 also seemed like a round number someone
// could pick for other homeserver implementations so I just did 10+1 to
// make sure it also worked in that case.
//numBatches := 11
numBatches := 2
numHistoricalMessagesPerBatch := 100
// wait X number of ms to ensure that the timestamp changes enough for
// each of the historical messages we try to import later
time.Sleep(time.Duration(numBatches*numHistoricalMessagesPerBatch) * timeBetweenMessages)

// eventIDsAfter
createMessagesInRoom(t, alice, roomID, 2, "eventIDsAfter")

// Import a long chain of batches connected to each other.
// We want to make sure Synapse doesn't blow up after we import
// many messages.
var expectedEventIDs []string
var denyListEventIDs []string
var baseInsertionEventID string
nextBatchID := ""
for i := 0; i < numBatches; i++ {
insertTime := timeAfterEventBefore.Add(timeBetweenMessages * time.Duration(numBatches-numHistoricalMessagesPerBatch*i))
batchSendRes := batchSendHistoricalMessages(
t,
as,
roomID,
eventIdBefore,
nextBatchID,
createJoinStateEventsForBatchSendRequest([]string{virtualUserID}, insertTime),
createMessageEventsForBatchSendRequest([]string{virtualUserID}, insertTime, numHistoricalMessagesPerBatch),
// Status
200,
)
batchSendResBody := client.ParseJSON(t, batchSendRes)
// Make sure we see all of the historical messages
expectedEventIDs = append(expectedEventIDs, client.GetJSONFieldStringArray(t, batchSendResBody, "event_ids")...)
// We should not find any historical state between the batches of messages
denyListEventIDs = append(denyListEventIDs, client.GetJSONFieldStringArray(t, batchSendResBody, "state_event_ids")...)
nextBatchID = client.GetJSONFieldStr(t, batchSendResBody, "next_batch_id")

// Grab the base insertion event ID to reference later in the marker event
if i == 0 {
baseInsertionEventID = client.GetJSONFieldStr(t, batchSendResBody, "base_insertion_event_id")
}
}

// Make sure we see the events at the very start of the message history
expectedEventIDs = append(expectedEventIDs, eventIDsBefore...)

// Join the room from a remote homeserver after the historical messages were sent
remoteCharlie.JoinRoom(t, roomID, []string{"hs1"})

// Send the marker event
sendMarkerAndEnsureBackfilled(t, as, remoteCharlie, roomID, baseInsertionEventID)

// Make sure events can be backfilled from the remote homeserver
paginateUntilMessageCheckOff(t, remoteCharlie, roomID, expectedEventIDs, denyListEventIDs)
})
})

t.Run("Existing room versions", func(t *testing.T) {
Expand Down Expand Up @@ -1017,10 +1088,10 @@ func includes(needle string, haystack []string) bool {
func fetchUntilMessagesResponseHas(t *testing.T, c *client.CSAPI, roomID string, check func(gjson.Result) bool) {
t.Helper()
start := time.Now()
checkCounter := 0
callCounter := 0
for {
if time.Since(start) > c.SyncUntilTimeout {
t.Fatalf("fetchUntilMessagesResponseHas timed out. Called check function %d times", checkCounter)
t.Fatalf("fetchUntilMessagesResponseHas timed out. Called check function %d times", callCounter)
}

messagesRes := c.MustDoFunc(t, "GET", []string{"_matrix", "client", "r0", "rooms", roomID, "messages"}, client.WithContentType("application/json"), client.WithQueries(url.Values{
Expand All @@ -1044,9 +1115,104 @@ func fetchUntilMessagesResponseHas(t *testing.T, c *client.CSAPI, roomID string,
}
}

checkCounter++
// Add a slight delay so we don't hammmer the messages endpoint
time.Sleep(500 * time.Millisecond)
callCounter++
}
}

// Paginate the /messages endpoint until we find all of the expectedEventIds
// (order does not matter). If any event in denyListEventIDs is found, an error
// will be thrown.
func paginateUntilMessageCheckOff(t *testing.T, c *client.CSAPI, roomID string, expectedEventIDs []string, denyListEventIDs []string) {
t.Helper()
start := time.Now()

workingExpectedEventIDMap := make(map[string]string)
for _, expectedEventID := range expectedEventIDs {
workingExpectedEventIDMap[expectedEventID] = expectedEventID
}

denyEventIDMap := make(map[string]string)
for _, denyEventID := range denyListEventIDs {
denyEventIDMap[denyEventID] = denyEventID
}

var actualEventIDList []string
callCounter := 0
messageResEnd := ""
generateErrorMesssageInfo := func() string {
i := 0
leftoverEventIDs := make([]string, len(workingExpectedEventIDMap))
for eventID := range workingExpectedEventIDMap {
leftoverEventIDs[i] = eventID
i++
}

return fmt.Sprintf("Called /messages %d times but only found %d/%d expected messages. Leftover messages we expected (%d): %s. We saw %d events over all of the API calls: %s",
callCounter,
len(expectedEventIDs)-len(leftoverEventIDs),
len(expectedEventIDs),
len(leftoverEventIDs),
leftoverEventIDs,
len(actualEventIDList),
actualEventIDList,
)
}

for {
if time.Since(start) > 200*c.SyncUntilTimeout {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This timeout is currently very large to accommodate the long ~20s /messages durations. We also have to make that request 11 times during the test which adds up very fast.

Synapse really has to chug for those requests 👹 and ideally wouldn't have to modify this at all.

I would need to look into optimizing Synapse to make this fast which we should probably do anyway as this is painfully slow.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is currently set to a whopping 1000s by default

SyncUntilTimeout: 5 * time.Second,
which seems excessive even given this.

Copy link
Contributor Author

@MadLittleMods MadLittleMods Nov 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kegsay Sorry this wasn't clear as undrafting indicates "marked this pull request as ready for review" but I didn't assign you this one yet specifically because of this problem.

The test itself is good to go (timeout can be switched to normal and numBatches could be switched back to 11) and shipped but want to make it actually acceptable time-wise to run against Synapse before merging.

I used numBatches=2 during testing because it's much faster to see results while developing.

Thanks for the review pass though and I'll fix up these other spots ⏩

Copy link
Contributor Author

@MadLittleMods MadLittleMods May 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MadLittleMods what is the status of this PR? It's still marked as a Draft.

#214 (comment)

Status is still the same as the last update in this thread. It's too slow on Synapse for me to be comfortable merging it yet.

Copy link
Contributor Author

@MadLittleMods MadLittleMods May 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In terms of optimizing Synapse to make this test viable to run time-wise, I'm a bit blocked on a race condition in some recent code, matrix-org/synapse#12394 (comment) -> matrix-org/synapse#12646

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In terms of optimizing Synapse to make this test viable to run time-wise, I'm a bit blocked on a race condition in some recent code, matrix-org/synapse#12394 (comment) -> matrix-org/synapse#12646

Now matrix-org/synapse#12988 I think

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

t.Fatalf(
"paginateUntilMessageCheckOff timed out. %s",
generateErrorMesssageInfo(),
)
}

messagesRes := c.MustDoFunc(t, "GET", []string{"_matrix", "client", "r0", "rooms", roomID, "messages"}, client.WithContentType("application/json"), client.WithQueries(url.Values{
"dir": []string{"b"},
"limit": []string{"100"},
"from": []string{messageResEnd},
}))
callCounter++
messsageResBody := client.ParseJSON(t, messagesRes)
messageResEnd = client.GetJSONFieldStr(t, messsageResBody, "end")
// Since the original body can only be read once, create a new one from the body bytes we just read
messagesRes.Body = ioutil.NopCloser(bytes.NewBuffer(messsageResBody))

foundEventInMessageResponse := false
must.MatchResponse(t, messagesRes, match.HTTPResponse{
JSON: []match.JSON{
match.JSONArrayEach("chunk", func(ev gjson.Result) error {
foundEventInMessageResponse = true
eventID := ev.Get("event_id").Str
actualEventIDList = append(actualEventIDList, eventID)

if _, keyExists := denyEventIDMap[eventID]; keyExists {
return fmt.Errorf(
"paginateUntilMessageCheckOff found unexpected message=%s in deny list while paginating. %s",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👆 Calling out a change since the last review pass. I added a denyListEventIDs so we can ensure that the historical state doesn't appear between batches of historical messages while we're paginating.

eventID,
generateErrorMesssageInfo(),
)
}

if _, keyExists := workingExpectedEventIDMap[eventID]; keyExists {
delete(workingExpectedEventIDMap, eventID)
}

return nil
}),
},
})

if !foundEventInMessageResponse {
t.Fatalf(
"paginateUntilMessageCheckOff reached the end of the messages without finding all expected events. %s",
generateErrorMesssageInfo(),
)
}

// We were able to find all of the expected events!
if len(workingExpectedEventIDMap) == 0 {
return
}
}
}

Expand Down