-
Notifications
You must be signed in to change notification settings - Fork 5
/
compaction_optimization.sh
executable file
·151 lines (120 loc) · 5.21 KB
/
compaction_optimization.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/bin/bash
set -e
source common.sh
SIZE=10000
BATCH_SIZE=1024
# This test is to check if the compaction optimization is working as expected,
# by forcing the maximum segment size to be a low number, 5MB, we ensure that
# the compaction continues even if the first segment size is over that limit.
# We noticed in one of our customers that the lsm store had grown massively
# and the compaction was not working as expected, this test is to ensure that
# the compaction is working as expected.
echo "Building all required containers"
( cd apps/importer-no-vector-index/ && docker build -t importer-no-vector . )
( cd apps/analyze-segments/ && docker build -t analyzer . )
export COMPOSE="apps/weaviate-no-restart-on-crash/docker-compose.yml"
echo "Starting Weaviate..."
PERSISTENCE_MEMTABLES_FLUSH_IDLE_AFTER_SECONDS=1 PERSISTENCE_LSM_MAX_SEGMENT_SIZE="5MB" docker compose -f $COMPOSE up -d
wait_weaviate
function dump_logs() {
docker compose -f $COMPOSE logs
}
trap 'dump_logs' ERR
echo "Run import script in foreground..."
if ! docker run \
-e 'SHARDS=1' \
-e "SIZE=$SIZE" \
-e "BATCH_SIZE=$BATCH_SIZE" \
-e 'ORIGIN=http://localhost:8080' \
--network host \
-t importer-no-vector; then
echo "Importer failed, printing latest Weaviate logs..."
exit 1
fi
class_name="novector"
echo "Run analize segments script"
dir=$(ls --color=never ./apps/weaviate/data/${class_name})
num_directories=$(echo "$dir" | wc -l)
if [ "$num_directories" -gt 1 ]; then
echo "Error: Multiple directories found in ./apps/weaviate/data/${class_name}"
echo "$dir"
exit 1
fi
echo "Segments analysis with max LSM segment size of 5MB:"
output=$(docker run --network host -v ./apps/weaviate/data/${class_name}/${dir}/lsm/objects:/lsm_objects -t analyzer /app/analyzer --path /lsm_objects)
echo "$output"
# Maximum segment size
new_size=15
# Restart Weaviate and increase the max segment size to ${new_size} MB
docker compose -f apps/weaviate-no-restart-on-crash/docker-compose.yml down
PERSISTENCE_MEMTABLES_FLUSH_IDLE_AFTER_SECONDS=1 PERSISTENCE_LSM_MAX_SEGMENT_SIZE="${new_size}MB" docker compose -f $COMPOSE up -d
echo "Checking segment levels after max LSM segment increased to ${new_size}MB..."
# Wait for the compaction to occurr by checking the number of segments every 3 seconds.
# If the number of segments is decreasing, it means the compaction is ongoing. If the number of segments
# is the same for 5 consecutive checks, it means the compaction is finished.
timeout=120
start_time=$(date +%s)
prev_count=0
same_count=0
while true; do
output=$(docker run --network host -v ./apps/weaviate/data/${class_name}/${dir}/lsm/objects:/lsm_objects -t analyzer /app/analyzer --path /lsm_objects)
# Extract levels from the output
levels=$(awk 'NR>3 {print $3}' <<< "$output")
levels_count=$(echo "$levels" | wc -l)
# Check if levels_count is decreasing
if [ $levels_count -lt $prev_count ]; then
same_count=0
else
same_count=$((same_count + 1))
fi
# If for 5 consecutive checks the number of segments is the same, break the loop
if [ $same_count -ge 5 ]; then
echo "Compaction finished. All segments were compacted."
break
fi
# Check for timeout
current_time=$(date +%s)
elapsed_time=$((current_time - start_time))
if [ $elapsed_time -ge $timeout ]; then
echo "Timeout reached. Compaction did not finish in the expected time."
break
fi
prev_count=$levels_count
echo "Compaction ongoing. Waiting..."
sleep 3
done
echo "$output"
echo ""
# Once all segments are in compacted check if the sum of any pair of segments is greater than ${new_size}MB
# for consecutive pairs of segments with the same level.
output=$(docker run --network host -v ./apps/weaviate/data/${class_name}/${dir}/lsm/objects:/lsm_objects -t analyzer /app/analyzer --path /lsm_objects)
# Process the output, extracting segment sizes and levels
segments=$(awk 'NR>3 {print $2, $3}' <<< "$output")
segment_sizes=($(awk '{print $1}' <<< "$segments"))
segment_levels=($(awk '{print $2}' <<< "$segments"))
# Get total segments count
total_segments=${#segment_sizes[@]}
# Iterate over the segments and compare pairs
for ((i=0; i<$total_segments-1; i++)); do
# Clean up any unwanted characters from segment levels
cleaned_level=$(echo "${segment_levels[i]}" | tr -d '[:space:]')
cleaned_next_level=$(echo "${segment_levels[i+1]}" | tr -d '[:space:]')
# If the levels are the same for consecutive segments
if [ "$cleaned_level" = "$cleaned_next_level" ]; then
# Calculate the combined size of the two segments
combined_size=$((${segment_sizes[i]} + ${segment_sizes[i+1]}))
# If the combined size is less than new_size, exit with failure
if [ "$combined_size" -lt $((new_size * 1000000)) ]; then
combined_size_mb=$(expr $combined_size / 1000000)
segment_index=$((i+1)) # Compute index for next segment separately
echo "Combined size of segment ${i} and segment ${segment_index} (LEVEL ${cleaned_level}) is less than ${new_size}MB."
echo "Combined size: ${combined_size_mb}MB"
echo "Test failed."
exit 1
fi
fi
done
# If the loop completes without failure, the test passed
echo "All segment pairs with the same level have a combined size larger than ${new_size}MB. Test passed."
echo "Passed!"
shutdown