Skip to content

Commit

Permalink
Re-implement select_object_content implementation (#793)
Browse files Browse the repository at this point in the history
This change fixes multiple issues

- handles unicode boundaries properly for special delimiters
- handle zero payload 'Cont' event messages
- handle error messages properly
  • Loading branch information
harshavardhana authored and nitisht committed Sep 10, 2019
1 parent 84e57b6 commit 0625257
Show file tree
Hide file tree
Showing 15 changed files with 400 additions and 349 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ var/
*.egg-info/
.installed.cfg
*.egg
*~
.#*
#*
3 changes: 1 addition & 2 deletions docs/API.md
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,6 @@ __Return Value__
|``obj``| _SelectObjectReader_ |Select_object_reader object. |



__Example__


Expand Down Expand Up @@ -736,7 +735,7 @@ try:
# Get the stats
print(data.stats())

except CRCValidationError as err:
except SelectCRCValidationError as err:
print(err)
except ResponseError as err:
print(err)
Expand Down
18 changes: 11 additions & 7 deletions examples/select_object_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

from minio import Minio
from minio.error import ResponseError
from minio.select_object_reader import CRCValidationError
from minio.select_object_options import (SelectObjectOptions, CSVInput,
JSONInput, RequestProgress,
ParquetInput, InputSerialization,
OutputSerialization, CSVOutput,
JsonOutput)
from minio.select.errors import SelectCRCValidationError, SelectMessageError
from minio.select.options import (SelectObjectOptions, CSVInput,
JSONInput, RequestProgress,
ParquetInput, InputSerialization,
OutputSerialization, CSVOutput,
JsonOutput)

client = Minio('s3.amazonaws.com',
access_key='YOUR-ACCESSKEY',
Expand Down Expand Up @@ -71,7 +71,11 @@
# Get the stats
print(data.stats())

except CRCValidationError as err:
except SelectMessageError as err:
print(err)

except SelectCRCValidationError as err:
print(err)

except ResponseError as err:
print(err)
5 changes: 1 addition & 4 deletions minio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

__title__ = 'minio-py'
__author__ = 'MinIO, Inc.'
__version__ = '4.0.22'
__version__ = '5.0.0'
__license__ = 'Apache 2.0'
__copyright__ = 'Copyright 2015, 2016, 2017, 2018, 2019 MinIO, Inc.'

Expand All @@ -38,6 +38,3 @@
from .post_policy import PostPolicy
from .copy_conditions import CopyConditions
from .definitions import Bucket, Object
from .select_object_reader import SelectObjectReader


5 changes: 2 additions & 3 deletions minio/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@
is_valid_bucket_notification_config, is_valid_policy_type,
mkdir_p, dump_http, amzprefix_user_metadata,
is_supported_header,is_amz_header)
from .helpers import (MAX_MULTIPART_OBJECT_SIZE,
MAX_PART_SIZE,
from .helpers import (MAX_PART_SIZE,
MAX_POOL_SIZE,
MIN_PART_SIZE,
DEFAULT_PART_SIZE,
Expand All @@ -94,7 +93,7 @@
xml_marshal_select)
from .fold_case_dict import FoldCaseDict
from .thread_pool import ThreadPool
from .select_object_reader import SelectObjectReader
from .select import SelectObjectReader

# Comment format.
_COMMENTS = '({0}; {1})'
Expand Down
11 changes: 0 additions & 11 deletions minio/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,6 @@
MIN_PART_SIZE = 5 * 1024 * 1024 # 5MiB
DEFAULT_PART_SIZE = MIN_PART_SIZE # Currently its 5MiB


# Select Object Content
READ_SIZE_SELECT = 32 * 1024 # Buffer size
SQL = 'SQL' # Value for ExpressionType
EVENT_RECORDS = 'Records' # Event Type is Records
EVENT_PROGRESS = 'Progress' # Event Type Progress
EVENT_STATS = 'Stats' # Event Type Stats
EVENT = 'event' # Message Type is event
EVENT_END = 'End' # Event Type is End
ERROR = 'error' # Message Type is error

_VALID_BUCKETNAME_REGEX = re.compile('^[a-z0-9][a-z0-9\\.\\-]+[a-z0-9]$')
_ALLOWED_HOSTNAME_REGEX = re.compile(
'^((?!-)(?!_)[A-Z_\\d-]{1,63}(?<!-)(?<!_)\\.)*((?!_)(?!-)[A-Z_\\d-]{1,63}(?<!-)(?<!_))$',
Expand Down
31 changes: 31 additions & 0 deletions minio/select/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
# MinIO Python Library for Amazon S3 Compatible Cloud Storage,
# (C) 2019 MinIO, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
:copyright: (c) 2019 by MinIO, Inc.
:license: Apache 2.0, see LICENSE for more details.
"""

__title__ = 'minio-py'
__author__ = 'MinIO, Inc.'
__version__ = '0.0.1'
__license__ = 'Apache 2.0'
__copyright__ = 'Copyright 2019 MinIO, Inc.'

from .reader import *
from .helpers import *
from .errors import *
from .options import *
36 changes: 36 additions & 0 deletions minio/select/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
# 2019 MinIO, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
minio.select.errors
~~~~~~~~~~~~~~~
This module implements the error classes for SelectObject responses.
:copyright: (c) 2019 by MinIO, Inc.
:license: Apache 2.0, see LICENSE for more details.
"""

class SelectMessageError(Exception):
'''
Raised in case of message type 'error'
'''

class SelectCRCValidationError(Exception):
'''
Raised in case of CRC mismatch
'''
61 changes: 61 additions & 0 deletions minio/select/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C)
# 2019 MinIO, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
minio.select.helpers
~~~~~~~~~~~~~~~
This module implements the helper functions for SelectObject responses.
:copyright: (c) 2019 by MinIO, Inc.
:license: Apache 2.0, see LICENSE for more details.
"""

import codecs
from binascii import crc32

SQL = 'SQL' # Value for ExpressionType
EVENT_RECORDS = 'Records' # Event Type is Records
EVENT_PROGRESS = 'Progress' # Event Type Progress
EVENT_STATS = 'Stats' # Event Type Stats
EVENT_CONT = 'Cont' # Event Type continue
EVENT_END = 'End' # Event Type is End
EVENT_CONTENT_TYPE = "text/xml" # Event content xml type
EVENT = 'event' # Message Type is event
ERROR = 'error' # Message Type is error

def calculate_crc(value):
'''
Returns the CRC using crc32
'''
return crc32(value) & 0xffffffff

def validate_crc(current_value, expected_value):
'''
Validate through CRC check
'''
crc_current = calculate_crc(current_value)
crc_expected = byte_int(expected_value)
if crc_current == crc_expected:
return True
return False

def byte_int(data_bytes):
'''
Convert bytes to big-endian integer
'''
return int(codecs.encode(data_bytes, 'hex'), 16)
7 changes: 4 additions & 3 deletions minio/select_object_options.py → minio/select/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@
# limitations under the License.

"""
minio.select.options
~~~~~~~~~~~~~~~
This module creates the request for Select
This module implements the SelectOption definition for SelectObject API.
:copyright: (c) 2019 by MinIO, Inc.
:license: Apache 2.0, see LICENSE for more details.
"""
from .helpers import (SQL)

from .helpers import (SQL)

class CSVInput:
"""
Expand All @@ -41,7 +43,6 @@ def __init__(self, FileHeaderInfo=None, RecordDelimiter="\n",
self.Comments = Comments
self.AllowQuotedRecordDelimiter = AllowQuotedRecordDelimiter


class JSONInput:
"""
JSONInput: Input format as JSON.
Expand Down
Loading

0 comments on commit 0625257

Please sign in to comment.