Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added parallel merge sort #177

Merged
merged 2 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion pydatastructs/linear_data_structures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from . import (
arrays,
linked_lists
linked_lists,
algorithms
)

from .arrays import (
Expand All @@ -18,3 +19,8 @@
DoublyCircularLinkedList
)
__all__.extend(linked_lists.__all__)

from .algorithms import (
merge_sort_parallel
)
__all__.extend(algorithms.__all__)
95 changes: 95 additions & 0 deletions pydatastructs/linear_data_structures/algorithms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from pydatastructs.linear_data_structures.arrays import (
OneDimensionalArray, DynamicArray)
from pydatastructs.utils.misc_util import _check_type
from concurrent.futures import ThreadPoolExecutor
from math import log, floor

__all__ = [
'merge_sort_parallel'
]

def _merge(array, sl, el, sr, er, end):
l, r = [], []
for i in range(sl, el + 1):
if (i <= end and
array[i] is not None):
l.append(array[i])
array[i] = None
for i in range(sr, er + 1):
if (i <= end and
array[i] is not None):
r.append(array[i])
array[i] = None
i, j, k = 0, 0, sl
while i < len(l) and j < len(r):
if l[i] <= r[j]:
array[k] = l[i]
i += 1
else:
array[k] = r[j]
j += 1
k += 1

while i < len(l):
array[k] = l[i]
i += 1
k += 1

while j < len(r):
array[k] = r[j]
j += 1
k += 1

def merge_sort_parallel(array, num_threads, **kwargs):
"""
Implements parallel merge sort.

Parameters
==========

array: Array
The array which is to be sorted.
num_threads: int
The maximum number of threads
to be used for sorting.
start: int
The starting index of the portion
which is to be sorted.
Optional, by default 0
end: int
The ending index of the portion which
is to be sorted.
Optional, by default the index
of the last position filled.

Examples
========

>>> from pydatastructs import OneDimensionalArray, merge_sort_parallel
>>> arr = OneDimensionalArray(int,[3, 2, 1])
>>> merge_sort_parallel(arr, 3)
>>> [arr[0], arr[1], arr[2]]
[1, 2, 3]

References
==========

.. [1] https://en.wikipedia.org/wiki/Merge_sort
"""
start = kwargs.get('start', 0)
end = kwargs.get('end', array._size - 1)
for size in range(floor(log(end - start + 1, 2)) + 1):
pow_2 = 2**size
with ThreadPoolExecutor(max_workers=num_threads) as Executor:
i = start
while i <= end:
Executor.submit(
_merge,
array,
i, i + pow_2 - 1,
i + pow_2, i + 2*pow_2 - 1,
end).result()
i = i + 2*pow_2

if _check_type(array, DynamicArray):
array._modify(force=True)
10 changes: 4 additions & 6 deletions pydatastructs/linear_data_structures/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,12 @@ def __new__(cls, dtype=NoneType, *args, **kwargs):
obj._last_pos_filled = obj._num - 1
return obj

def _modify(self):
def _modify(self, force=False):
"""
Contracts the array if Num(T)/Size(T) falls
below load factor.
"""
if self._num/self._size < self._load_factor:
if (self._num/self._size < self._load_factor) or force:
arr_new = OneDimensionalArray(self._dtype, 2*self._num + 1)
j = 0
for i in range(self._last_pos_filled + 1):
Expand All @@ -231,14 +231,12 @@ def append(self, el):
for i in range(self._last_pos_filled + 1):
arr_new[i] = self[i]
arr_new[self._last_pos_filled + 1] = el
self._last_pos_filled += 1
self._size = arr_new._size
self._num += 1
self._data = arr_new._data
else:
self[self._last_pos_filled + 1] = el
self._last_pos_filled += 1
self._num += 1
self._last_pos_filled += 1
self._num += 1
self._modify()

def delete(self, idx):
Expand Down
33 changes: 33 additions & 0 deletions pydatastructs/linear_data_structures/tests/test_algorithm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from pydatastructs import (
merge_sort_parallel, DynamicOneDimensionalArray,
OneDimensionalArray)
import random

def test_merge_sort_parallel():

random.seed(1000)

n = random.randint(10, 20)
arr = DynamicOneDimensionalArray(int, 0)
for _ in range(n):
arr.append(random.randint(1, 1000))
for _ in range(n//3):
arr.delete(random.randint(0, n//2))
expected_arr = [686, 779, 102, 134, 362,
448, 480, 548, 228, 688,
247, 373, 696, None, None,
None, None, None, None,
None, None, None, None,
None, None, None, None]
merge_sort_parallel(arr, 5, start=2, end=10)
assert arr._data == expected_arr

n = random.randint(10, 20)
arr = OneDimensionalArray(int, n)
for i in range(n):
arr[i] = random.randint(1, 1000)
expected_arr = [42, 695, 147, 500, 768,
998, 473, 732, 728, 426,
709, 910]
merge_sort_parallel(arr, 5, start=2, end=5)
assert arr._data == expected_arr