Skip to content

Commit

Permalink
Added globalization unit tests for various charset including GB18030 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Gene Lee authored Sep 16, 2019
1 parent 0db7f76 commit a43215b
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 26 deletions.
2 changes: 2 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ def unit_test():
'mssqlcli/jsonrpc/tests '
'mssqlcli/jsonrpc/contracts/tests '
'tests/test_telemetry.py '
'tests/test_localization.py '
'tests/test_globalization.py '
'tests/test_special.py'.format(runid, python_version),
utility.ROOT_DIR,
continue_on_error=False)
Expand Down
162 changes: 162 additions & 0 deletions tests/test_globalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# coding=utf-8
import unittest
import socket
from utility import random_str, decode
from mssqltestutils import create_mssql_cli_client, shutdown


# All tests require a live connection to test server.
# Make modifications to mssqlutils.create_mssql_cli_client() to use a different server and database.
# Please Note: These tests cannot be run offline.
class GlobalizationResultSetTests(unittest.TestCase):

def test_charset_double(self):
charset_list = self.get_charset_double()
self.run_charset_validation(charset_list)


def test_charset_four(self):
charset_list = self.get_charset_four()
self.run_charset_validation(charset_list)


def test_charset_mongolian(self):
charset_list = self.get_charset_mongolian()
self.run_charset_validation(charset_list)


def test_charset_uyghur(self):
charset_list = self.get_charset_uyghur()
self.run_charset_validation(charset_list)


def test_charset_tibetian(self):
charset_list = self.get_charset_tibetian()
self.run_charset_validation(charset_list)


def test_charset_yi(self):
charset_list = self.get_charset_yi()
self.run_charset_validation(charset_list)


def test_charset_zang(self):
charset_list = self.get_charset_zang()
self.run_charset_validation(charset_list)


def run_charset_validation(self, charset_list):
"""
Verify the column names and string values in rows returned by
select statement are properly encoded as unicode.
"""
local_machine_name = socket.gethostname().replace('-','_').replace('.','_')
try:
client = create_mssql_cli_client()
max_string_length = 50

# Takes 50 characters at a time from charset until charset becomes empty.
# Each time, the characters are used for 'create table' and 'insert into' statement
# that are executed by client.execute_query().
# We validates the query results are the same value we inserted and
# they are properly unicode encoded.
for charset in charset_list:
while len(charset) > 0:
test_string_length = min(len(charset), max_string_length)
test_str = charset[0:test_string_length]
charset = charset[test_string_length:]
col1_name = u'col_{0}1'.format(test_str)
col2_name = u'col_{0}2'.format(test_str)
table_name = u'#mssqlcli_{0}_{1}_{2}'.format(local_machine_name, random_str(), test_str)
setup_query = u"CREATE TABLE {0} ({1} nvarchar(MAX), {2} int);"\
u"INSERT INTO {0} VALUES (N'value_{3}1', 1);"\
u"INSERT INTO {0} VALUES (N'value_{3}2', 2);"\
.format(table_name, col1_name, col2_name, test_str)

for rows, columns, status, statement, is_error in client.execute_query(setup_query):
assert is_error == False

select_query = u"SELECT {0}, {1} FROM {2};".format(col1_name, col2_name, table_name)
for rows, columns, status, statement, is_error in client.execute_query(select_query):
assert is_error == False
assert len(rows) == 2
assert decode(rows[0][0]) == u'value_{0}1'.format(test_str)
assert decode(rows[1][0]) == u'value_{0}2'.format(test_str)
finally:
shutdown(client)


def get_charset_double(self):
return [
u'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
u'abcdefghijklmnopqrstuvwxyz'
u'0123456789'
u'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとど'
u'なにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん'
u'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニ'
u'ヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ'
u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρστυφχψω'
u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя'
u'āáǎàēéěèīíǐìōóǒòūúǔùǖǘǚǜüêɑńňǹɡ'
u'ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦㄧㄨㄩ'
u'啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘薄雹保堡饱宝抱报暴豹鲍爆杯碑悲病并玻菠播拨钵波博勃搏铂箔伯帛场尝常长偿肠厂敞畅唱倡超抄'
u'钞朝础储矗搐触处揣川穿椽传船喘串疮怠耽担丹单郸掸胆旦氮但惮淡诞弹丁盯叮钉顶鼎锭定订丢东冬董懂动贰发罚筏伐乏阀法珐藩'
u'丂丄丅丆丏丒丗丟丠両丣並丩丮丯丱侤侫侭侰侱侲侳侴侶侷侸侹侺侻侼侽傽傾傿僀僁僂僃僄僅僆僇僈僉僊僋僌凘凙凚凜凞凟凢凣凥処'
u'凧凨凩凪凬凮匑匒匓匔匘匛匜匞匟匢匤匥匧匨匩匫咢咥咮咰咲咵咶咷咹咺咼咾哃哅哊哋嘆嘇嘊嘋嘍嘐嘑嘒嘓嘔嘕嘖嘗嘙嘚嘜園圓圔圕'
u'獲獳獴獵獶獷獸獹獺獻獼獽獿玀玁玂珸珹珺珻珼珽現珿琀琁琂琄琇琈琋琌瑻瑼瑽瑿璂璄璅璆璈璉璊璌璍璏璑璒瓳瓵瓸瓹瓺瓻瓼瓽瓾甀'
u'甁甂甃甅甆甇疈疉疊疌疍疎疐疓疕疘疛疜疞疢疦疧癅癆癇癈癉癊癋癎癏癐癑癒癓癕癗癘盄盇盉盋盌盓盕盙盚盜盝盞盠盡盢監睝睞睟睠'
u'〡〢〣〤〥〦〧〨〩'
]


def get_charset_four(self):
return [
u'㐀㐁㐂㐃㐄㐅㐆㐇㐈㐉㐊㐋㐌㐍㐎㐏㐐㐑㐒㐓㐔㐕㐖㐗㐘㐙㐚㐛㐜㐝㐞㐟㐠㐡㐢㐣㐤㐥㐦㐧㐨㐩㐪㐫㐬㐭㐮㐯㐰㐱㐲㐳㐴㐵㐶㐷㐸㐹'
u'㐺㐻㐼㐽㐾㐿㑀㑁㑂㑃㑄㑅㑆㑈㑉㑊㑋㑌㑍㑎㑏㑐㑑㑒㑓㑔㑕㑖㑗㑘㑙㑚㑛㑜㑝㑞㑟㑠㑡㑢㑣㑤㑥㑦㑧㑨㑩㑪㑫㑬㑭㑮㑯㑰㑱㑲㑴㑵'
u'㑶㑷㑸㑹㑺㑻㑼㑽㑾㑿㒀㒁㒂㒃㒄㒅㒆㒇㒈㒉㒊㒋㒌㒍㒎㒏㒐㒑㒒㒓㒔㒕㒖㒗㒘㒙㒚㒛㒜㒝㒞㒟㒠㒡㒢㒣㒤㒥㒦㒧㒨㒩㒪㒫㒬㒭㒮㒯'
u'㒰㒱㒲㒳㒴㒵㒶㒷㒸㒹㒺㒻㒼㒽㒾㒿㓀㓁㓂㓃㓄㓅㓆㓇㓈㓉㓊㓋㓌㓍㓎㓏㓐㓑㓒㓓㓔㓕㓖㓗㓘㓙㓚㓛㓜㓝㓞㓟㓠㓡㓢㓣㓤㓥㓦㓧㓨㓩'
u'㓪㓫㓬㓭㓮㓯㓰㓱㓲㓳㓴㓵㓶㓷㓸㓹㓺㓻㓼㓽㓾㓿㔀㔁㔂㔃㔄㔅㔆㔇㔈㔉㔊㔋㔌㔍㔎㔏㔐㔑㔒㔓㔔㔕㔖㔗㔘㔙㔚㔛㔜㔝㔞㔟㔠㔡㔢㔣'
u'㔤㔥㔦㔧㔨㔩㔪㔫㔬㔭㔮㔯㔰㔱㔲㔳㔴㔵㔶㔷㔸㔹㔺㔻㔼㔽㔾㔿㕀㕁㕂㕃㕄㕅㕆㕇㕈㕉㕊㕋㕌㕍㕎㕏㕐㕑㕒㕓㕔㕕㕖㕗㕘㕙㕚㕛㕜㕝'
]


def get_charset_mongolian(self):
return [
u'᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙ᠠᠡᠢᠣᠤᠥᠦᠧᠨᠩᠪᠫᠬᠭᠮᠯᠰᠱᠲᠳᠴᠵᠶᠷᠸᠹᠺᠻᠼᠽᠾᠿᡀᡁᡂᡃᡄᡅᡆᡇᡈᡉᡊᡋᡌᡍᡎᡏᡐᡑᡒᡓᡔᡕᡖᡗᡘᡙᡚᡛᡜᡝᡞᡟᡠᡡᡢᡣᡤᡥᡦᡧᡨᡩᡪᡫᡬᡭᡮᡯᡰᡱᡲᡳᡴᡵᡶᡷᢀᢁᢂᢃᢄᢅᢆᢇᢈᢉᢊᢋᢌᢍᢎᢏᢐᢑᢒᢓᢔᢕᢖᢗᢘᢙᢚᢛᢜᢝᢞᢟᢠᢡᢢᢣᢤᢥᢦᢧᢨᢩ'
]


def get_charset_uyghur(self):
return [
u'ﺏﺐﺑﺒﺓﺔﺕﺖﺗﺘﺙﺚﺛﺜﺝﺞﺟﺠﺡﺢﺣﺤﺥﺦﺧﺨﺩﺪﺫﺬﺭﺮﺯﺰﺱﺲﺳﺴﺵﺶﺷﺸﺹﺺﺻﺼﺽﺾﺿﻀﻁﻂﻃﻄﻅﻆﻇﻈﻉﻊﻋﻌﻍﻎﻏﻐﻑﻒﻓﻔﻕﻖﻗﻘﻙﻚﻛﻜﻝﻞﻟﻠﻡﻢﻣﻤﻥﻦﻧﻨﻩﻪﻫﻬﻭﻮﻯﻰﻱ'
u'ﮀﮁﮂﮃﮄﮅﮆﮇﮈﮉﮊﮋﮌﮍﮎﮏﮐﮑﮒﮓﮔﮕﮖﮗﮘﮙﮚﮛﮜﮝﮞﮟﮠﮡﮢﮣﮤﮥﮦﮧﮨﮩﮪﮫﮬﮭﮮﮯﮰﮱﯚﯛﯜﯝﯞﯟﯠﯡﯢﯣﯤﯥﯦﯧﯨﯩﯪﯫﯬﯭﯮﯯﯰﯱﯲﯳﯴﯵﯶﯷﯸﯹﯺﯻﯼﯽﯾﯿﺄﺅﺆﺇﺈﺉﺊﺋﺌﺍﺎ'
u'ڡڢڣڤڥڦڧڨکڪګڬڭڮگڰڱڲڳڴڵڶڷڸڹںڻڼڽھڿۀہۂۃۄۅۆۇۈۉۊۋیۍێۏېۑפֿﭏﭐﭑﭒﭓﭔﭕﭖﭗﭘﭙﭚﭛﭜﭝﭞﭟﭠﭡﭢﭣﭤﭥﭦﭧﭨﭩﭪﭫﭬﭭﭮﭯﭰﭱﭲﭳﭴﭵﭶﭷﭸﭹﭺﭻﭼﭽﭾﭿ'
u'بةتثجحخدذرزسشصضٻټٽپٿڀځڂڃڄڅچڇڈډڊڋڌڍڎڏڐڑڒړڔڕږڗژڙښڛڜڝڞڟڠ'
]


def get_charset_tibetian(self):
return [
u'ༀཀཁགགྷངཅཆཇཉཊཋཌཌྷཎཏཐདདྷནཔཕབབྷམཙཚཛཛྷཝཞཟའཡརལཤཥསཧཨཀྵཪྈྉྊྋ'
]


def get_charset_yi(self):
return [
u'ꀀꀁꀂꀃꀄꀅꀆꀇꀈꀉꀊꀋꀌꀍꀎꀏꀐꀑꀒꀓꀔꀕꀖꀗꀘꀙꀚꀛꀜꀝꀞꀟꀠꀡꀢꀣꀤꀥꀦꀧꀨꀩꀪꀫꀬꀭꀮꀯꀰꀱꀲꀳꀴꀵꀶꀷꀸꀹꀺꀻꀼꀽꀾꀿꁀꁁꁂꁃꁄꁅꁆꁇꁈꁉꁊꁋꁌꁍꁎꁏꁐꁑꁒꁓꁔꁕꁖ'
u'ꁗꁘꁙꁚꁛꁜꁝꁞꁟꁠꁡꁢꁣꁤꁥꁦꁧꁨꁩꁪꁫꁬꁭꁮꁯꁰꁱꁲꁳꁴꁵꁶꁷꁸꁹꁺꁻꁼꁽꁾꁿꂀꂁꂂꂃꂄꂅꂆꂇꂈꂉꂊꂋꂌꂍꂎꂏꂐꂑꂒꂓꂔꂕꂖꂗꂘꂙꂚꂛꂜꂝꂞꂟꂠꂡꂢꂣꂤꂥꂦꂧꂨꂩꂪꂫꂬꂭ'
u'ꂮꂯꂰꂱꂲꂳꂴꂵꂶꂷꂸꂹꂺꂻꂼꂽꂾꂿꃀꃁꃂꃃꃄꃅꃆꃇꃈꃉꃊꃋꃌꃍꃎꃏꃐꃑꃒꃓꃔꃕꃖꃗꃘꃙꃚꃛꃜꃝꃞꃟꃠꃡꃢꃣꃤꃥꃦꃧꃨꃩꃪꃫꃬꃭꃮꃯꃰꃱꃲꃳꃴꃵꃶꃷꃸꃹꃺꃻꃼꃽꃾꃿꄀꄁꄂꄃꄄ'
u'ꄅꄆꄇꄈꄉꄊꄋꄌꄍꄎꄏꄐꄑꄒꄓꄔꄕꄖꄗꄘꄙꄚꄛꄜꄝꄞꄟꄠꄡꄢꄣꄤꄥꄦꄧꄨꄩꄪꄫꄬꄭꄮꄯꄰꄱꄲꄳꄴꄵꄶꄷꄸꄹꄺꄻꄼꄽꄾꄿꅀꅁꅂꅃꅄꅅꅆꅇꅈꅉꅊꅋꅌꅍꅎꅏꅐꅑꅒꅓꅔꅕꅖꅗꅘꅙꅚꅛ'
u'ꅜꅝꅞꅟꅠꅡꅢꅣꅤꅥꅦꅧꅨꅩꅪꅫꅬꅭꅮꅯꅰꅱꅲꅳꅴꅵꅶꅷꅸꅹꅺꅻꅼꅽꅾꅿꆀꆁꆂꆃꆄꆅꆆꆇꆈꆉꆊꆋꆌꆍꆎꆏꆐꆑꆒꆓꆔꆕꆖꆗꆘꆙꆚꆛꆜꆝꆞꆟꆠꆡꆢꆣꆤꆥꆦꆧꆨꆩꆪꆫꆬꆭꆮꆯꆰꆱꆲ'
u'ꆳꆴꆵꆶꆷꆸꆹꆺꆻꆼꆽꆾꆿꇀꇁꇂꇃꇄꇅꇆꇇꇈꇉꇊꇋꇌꇍꇎꇏꇐꇑꇒꇓꇔꇕꇖꇗꇘꇙꇚꇛꇜꇝꇞꇟꇠꇡꇢꇣꇤꇥꇦꇧꇨꇩꇪꇫꇬꇭꇮꇯꇰꇱꇲꇳꇴꇵꇶꇷꇸꇹꇺꇻꇼꇽꇾꇿꈀꈁꈂꈃꈄꈅꈆꈇꈈꈉ'
]


def get_charset_zang(self):
return [
u'ꃙꃚꃛꃜꃝꃞꃟꃠꃡꃢꃣꃤꃥꃦꃧꃨꃩꃪꃫꃬꃭꃮꃯꃰꃱꃲꃳꃴꃵꃶꃷꃸꃹꃺꃻꃼꃽꃾꃿꄀꄁꄂꄃꄄꄅꄆꄇꄈꄉꄊꄋꄌꄍꄎꄏꄐꄑꄒꄓꄔꄕꄖꄗꄘꄙꄚꄛꄜꄝꄞꄟꄠꄡꄢꄣꄤꄥꄦꄧꄨꄩꄪꄫꄬꄭꄮꄯ'
u'ꄰꄱꄲꄳꄴꄵꄶꄷꄸꄹꄺꄻꄼꄽꄾꄿꅀꅁꅂꅃꅄꅅꅆꅇꅈꅉꅊꅋꅌꅍꅎꅏꅐꅑꅒꅓꅔꅕꅖꅗꅘꅙꅚꅛꅜꅝꅞꅟꅠꅡꅢꅣꅤꅥꅦꅧꅨꅩꅪꅫꅬꅭꅮꅯꅰꅱꅲꅳꅴꅵꅶꅷꅸꅹꅺꅻꅼꅽꅾꅿꆀꆁꆂꆃꆄꆅꆆ'
u'ꆇꆈꆉꆊꆋꆌꆍꆎꆏꆐꆑꆒꆓꆔꆕꆖꆗꆘꆙꆚꆛꆜꆝꆞꆟꆠꆡꆢꆣꆤꆥꆦꆧꆨꆩꆪꆫꆬꆭꆮꆯꆰꆱꆲꆳꆴꆵꆶꆷꆸꆹꆺꆻꆼꆽꆾꆿꇀꇁꇂꇃꇄꇅꇆꇇꇈꇉꇊꇋꇌꇍꇎꇏꇐꇑꇒꇓꇔꇕꇖꇗꇘꇙꇚꇛꇜꇝ'
u'ꇞꇟꇠꇡꇢꇣꇤꇥꇦꇧꇨꇩꇪꇫꇬꇭꇮꇯꇰꇱꇲꇳꇴꇵꇶꇷꇸꇹꇺꇻꇼꇽꇾꇿꈀꈁꈂꈃꈄꈅꈆꈇꈈꈉꈊꈋꈌꈍꈎꈏꈐꈑꈒꈓꈔꈕꈖꈗꈘꈙꈚꈛꈜꈝꈞꈟꈠꈡꈢꈣꈤꈥꈦꈧꈨꈩꈪꈫꈬꈭꈮꈯꈰꈱꈲꈳꈴ'
u'ꈵꈶꈷꈸꈹꈺꈻꈼꈽꈾꈿꉀꉁꉂꉃꉄꉅꉆꉇꉈꉉꉊꉋꉌꉍꉎꉏꉐꉑꉒꉓꉔꉕꉖꉗꉘꉙꉚꉛꉜꉝꉞꉟꉠꉡꉢꉣꉤ'
]
26 changes: 0 additions & 26 deletions tests/test_mssqlcliclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,32 +204,6 @@ def test_stored_proc_multiple_result_sets(self):
finally:
shutdown(client)


def test_select_result_unicode_encoded(self):
"""
Verify the column names and string values in rows returned by select statement are properly encoded as unicode.
"""
local_machine_name = socket.gethostname().replace('-','_').replace('.','_')
table_name = '#mssqlcli_{0}_{1}'.format(local_machine_name, random_str())
try:
client = create_mssql_cli_client()
setup_query = u"CREATE TABLE {0} (컬럼1 nvarchar(MAX), 컬럼2 int);"\
u"INSERT INTO {0} VALUES (N'테스트1', 1);"\
u"INSERT INTO {0} VALUES (N'테스트2', 2);"\
.format(table_name)
for rows, columns, status, statement, is_error in client.execute_query(setup_query):
assert is_error == False

select_query = u"SELECT * FROM {0};".format(table_name)
for rows, columns, status, statement, is_error in client.execute_query(select_query):
assert is_error == False
assert decode(columns[0]) == u'컬럼1'
assert decode(columns[1]) == u'컬럼2'
assert decode(rows[0][0]) == u'테스트1'
assert decode(rows[1][0]) == u'테스트2'
finally:
shutdown(client)


if __name__ == u'__main__':
unittest.main()

0 comments on commit a43215b

Please sign in to comment.