From 183efca322c1929365a3fa5d5187d86d9b1639ce Mon Sep 17 00:00:00 2001 From: czgdp1807 Date: Thu, 19 Mar 2020 20:11:09 +0530 Subject: [PATCH 1/2] added merge sort parallel --- .../linear_data_structures/__init__.py | 8 ++- .../linear_data_structures/algorithms.py | 60 +++++++++++++++++++ .../linear_data_structures/arrays.py | 10 ++-- .../tests/test_algorithm.py | 33 ++++++++++ 4 files changed, 104 insertions(+), 7 deletions(-) create mode 100644 pydatastructs/linear_data_structures/algorithms.py create mode 100644 pydatastructs/linear_data_structures/tests/test_algorithm.py diff --git a/pydatastructs/linear_data_structures/__init__.py b/pydatastructs/linear_data_structures/__init__.py index a6e0d74b1..f5dcce94c 100644 --- a/pydatastructs/linear_data_structures/__init__.py +++ b/pydatastructs/linear_data_structures/__init__.py @@ -2,7 +2,8 @@ from . import ( arrays, - linked_lists + linked_lists, + algorithms ) from .arrays import ( @@ -18,3 +19,8 @@ DoublyCircularLinkedList ) __all__.extend(linked_lists.__all__) + +from .algorithms import ( + merge_sort_parallel +) +__all__.extend(algorithms.__all__) diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py new file mode 100644 index 000000000..9a14fc05c --- /dev/null +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -0,0 +1,60 @@ +from pydatastructs.linear_data_structures.arrays import ( + OneDimensionalArray, DynamicArray) +from pydatastructs.utils.misc_util import _check_type +from concurrent.futures import ThreadPoolExecutor +from math import log, floor + +__all__ = [ + 'merge_sort_parallel' +] + +def _merge(array, sl, el, sr, er, end): + l, r = [], [] + for i in range(sl, el + 1): + if (i <= end and + array[i] is not None): + l.append(array[i]) + array[i] = None + for i in range(sr, er + 1): + if (i <= end and + array[i] is not None): + r.append(array[i]) + array[i] = None + i, j, k = 0, 0, sl + while i < len(l) and j < len(r): + if l[i] <= r[j]: + array[k] = l[i] + i += 1 + else: + array[k] = r[j] + j += 1 + k += 1 + + while i < len(l): + array[k] = l[i] + i += 1 + k += 1 + + while j < len(r): + array[k] = r[j] + j += 1 + k += 1 + +def merge_sort_parallel(array, num_threads, **kwargs): + start = kwargs.get('start', 0) + end = kwargs.get('end', array._size - 1) + for size in range(floor(log(end - start + 1, 2)) + 1): + pow_2 = 2**size + with ThreadPoolExecutor(max_workers=num_threads) as Executor: + i = start + while i <= end: + Executor.submit( + _merge, + array, + i, i + pow_2 - 1, + i + pow_2, i + 2*pow_2 - 1, + end).result() + i = i + 2*pow_2 + + if _check_type(array, DynamicArray): + array._modify(force=True) diff --git a/pydatastructs/linear_data_structures/arrays.py b/pydatastructs/linear_data_structures/arrays.py index 454d3b94d..130098b87 100644 --- a/pydatastructs/linear_data_structures/arrays.py +++ b/pydatastructs/linear_data_structures/arrays.py @@ -209,12 +209,12 @@ def __new__(cls, dtype=NoneType, *args, **kwargs): obj._last_pos_filled = obj._num - 1 return obj - def _modify(self): + def _modify(self, force=False): """ Contracts the array if Num(T)/Size(T) falls below load factor. """ - if self._num/self._size < self._load_factor: + if (self._num/self._size < self._load_factor) or force: arr_new = OneDimensionalArray(self._dtype, 2*self._num + 1) j = 0 for i in range(self._last_pos_filled + 1): @@ -231,14 +231,12 @@ def append(self, el): for i in range(self._last_pos_filled + 1): arr_new[i] = self[i] arr_new[self._last_pos_filled + 1] = el - self._last_pos_filled += 1 self._size = arr_new._size - self._num += 1 self._data = arr_new._data else: self[self._last_pos_filled + 1] = el - self._last_pos_filled += 1 - self._num += 1 + self._last_pos_filled += 1 + self._num += 1 self._modify() def delete(self, idx): diff --git a/pydatastructs/linear_data_structures/tests/test_algorithm.py b/pydatastructs/linear_data_structures/tests/test_algorithm.py new file mode 100644 index 000000000..d33f802c5 --- /dev/null +++ b/pydatastructs/linear_data_structures/tests/test_algorithm.py @@ -0,0 +1,33 @@ +from pydatastructs import ( + merge_sort_parallel, DynamicOneDimensionalArray, + OneDimensionalArray) +import random + +def test_merge_sort_parallel(): + + random.seed(1000) + + n = random.randint(10, 20) + arr = DynamicOneDimensionalArray(int, 0) + for _ in range(n): + arr.append(random.randint(1, 1000)) + for _ in range(n//3): + arr.delete(random.randint(0, n//2)) + expected_arr = [686, 779, 102, 134, 362, + 448, 480, 548, 228, 688, + 247, 373, 696, None, None, + None, None, None, None, + None, None, None, None, + None, None, None, None] + merge_sort_parallel(arr, 5, start=2, end=10) + assert arr._data == expected_arr + + n = random.randint(10, 20) + arr = OneDimensionalArray(int, n) + for i in range(n): + arr[i] = random.randint(1, 1000) + expected_arr = [42, 695, 147, 500, 768, + 998, 473, 732, 728, 426, + 709, 910] + merge_sort_parallel(arr, 5, start=2, end=5) + assert arr._data == expected_arr From d1a537da8c458148cc44f2cb9712bb47d5de0efe Mon Sep 17 00:00:00 2001 From: czgdp1807 Date: Thu, 19 Mar 2020 20:17:20 +0530 Subject: [PATCH 2/2] docs added --- .../linear_data_structures/algorithms.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py index 9a14fc05c..fa830dfc4 100644 --- a/pydatastructs/linear_data_structures/algorithms.py +++ b/pydatastructs/linear_data_structures/algorithms.py @@ -41,6 +41,41 @@ def _merge(array, sl, el, sr, er, end): k += 1 def merge_sort_parallel(array, num_threads, **kwargs): + """ + Implements parallel merge sort. + + Parameters + ========== + + array: Array + The array which is to be sorted. + num_threads: int + The maximum number of threads + to be used for sorting. + start: int + The starting index of the portion + which is to be sorted. + Optional, by default 0 + end: int + The ending index of the portion which + is to be sorted. + Optional, by default the index + of the last position filled. + + Examples + ======== + + >>> from pydatastructs import OneDimensionalArray, merge_sort_parallel + >>> arr = OneDimensionalArray(int,[3, 2, 1]) + >>> merge_sort_parallel(arr, 3) + >>> [arr[0], arr[1], arr[2]] + [1, 2, 3] + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Merge_sort + """ start = kwargs.get('start', 0) end = kwargs.get('end', array._size - 1) for size in range(floor(log(end - start + 1, 2)) + 1):