Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix] Handle all-caps address QR data; refactor/cleanup; more tests #667

Merged
merged 5 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 57 additions & 47 deletions src/seedsigner/models/decode_qr.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,7 @@ def base43_decode(s):
def is_bitcoin_address(s):
if re.search(r'^bitcoin\:.*', s, re.IGNORECASE):
return True
elif re.search(r'^((bc1|tb1|bcr|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,62})$', s):
# TODO: Handle regtest bcrt?
elif re.search(r'^((bc1|tb1|bcr|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,62})$', s, re.IGNORECASE):
return True
else:
return False
Expand Down Expand Up @@ -940,61 +939,72 @@ def __init__(self):


def add(self, segment, qr_type=QRType.BITCOIN_ADDRESS):
r = re.search(r'((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})', segment)
if r != None:
self.address = r.group(1)

if re.search(r'^((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})$', self.address) != None:
self.complete = True
self.collected_segments = 1

# get address type
r = re.search(r'^((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})$', self.address)
if r != None:
r = r.group(2)

if r == "1":
# Legacy P2PKH. mainnet
self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET)
"""
Input may be prefixed with "bitcoin:" but will be ignored.

RegEx searches for a recognizable bitcoin address.
* The `^` ensures that the specified address prefixes can only match at
the beginning of the address.

Result will yield the following match groups:
* group 1: complete address
* group 2: address prefix
"""
address_match = re.search(r'^((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})', segment.split(":")[-1], re.IGNORECASE)
if address_match != None:
self.address = address_match.group(1)
self.complete = True
self.collected_segments = 1

# Have to handle wallets that uppercase bech32 addresses.
# Note that it's safe to lowercase the prefix for ALL addr formats.
addr_prefix = address_match.group(2).lower()

if addr_prefix == "1":
# Legacy P2PKH. mainnet
self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET)

elif r == "m" or r == "n":
self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.TESTNET)
elif addr_prefix in ["m", "n"]:
self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.TESTNET)

elif r == "3":
# Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); mainnet
# TODO: Would be more correct to use a P2SH constant
self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.MAINNET)
elif addr_prefix == "3":
# Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); mainnet
# TODO: Would be more correct to use a P2SH constant
self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.MAINNET)

elif r == "2":
# Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); testnet / regtest
self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET)
elif addr_prefix == "2":
# Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); testnet / regtest
self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET)

elif r == "bc1q":
# Native Segwit (single sig or multisig), mainnet
self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET)
elif addr_prefix == "bc1q":
# Native Segwit (single sig or multisig), mainnet
self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET)

elif r == "tb1q":
# Native Segwit (single sig or multisig), testnet
self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET)
elif addr_prefix == "tb1q":
# Native Segwit (single sig or multisig), testnet
self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET)

elif r == "bcrt1q":
# Native Segwit (single sig or multisig), regtest
self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.REGTEST)
elif addr_prefix == "bcrt1q":
# Native Segwit (single sig or multisig), regtest
self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.REGTEST)

elif r == "bc1p":
# Native Segwit (single sig or multisig), mainnet
self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.MAINNET)
elif addr_prefix == "bc1p":
self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.MAINNET)

elif r == "tb1p":
# Native Segwit (single sig or multisig), testnet
self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.TESTNET)
elif addr_prefix == "tb1p":
self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.TESTNET)

elif r == "bcrt1p":
# Native Segwit (single sig or multisig), regtest
self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.REGTEST)

return DecodeQRStatus.COMPLETE
elif addr_prefix == "bcrt1p":
self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.REGTEST)
# Note: there is no final "else" here because the regex won't return any other matches.

# If the addr type is case-insensitive, ensure we return it lowercase
if self.address_type[0] in [SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TAPROOT]:
self.address = self.address.lower()

return DecodeQRStatus.COMPLETE

logger.debug(f"Invalid address: {segment}")
return DecodeQRStatus.INVALID


Expand Down
176 changes: 114 additions & 62 deletions tests/test_decodepsbtqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,69 +281,121 @@ def test_short_4_letter_mnemonic_qr():
assert d.get_seed_phrase() == ["height", "demise", "useless", "trap", "grow", "lion", "found", "off", "key", "clown", "transfer", "enroll"]


def test_bitcoin_address():
bad1 = "loremipsum"
bad2 = "0xde0b295669a9fd93d5f28d9ec85e40f4cb697bae"
bad3 = "121802020768124106400009195602431595117715840445"

legacy_address1 = "1KFHE7w8BhaENAswwryaoccDb6qcT6DbYY"
legacy_address2 = "16ftSEQ4ctQFDtVZiUBusQUjRrGhM3JYwe"

main_bech32_address = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq"
test_bech32_address = "tb1qkurj377gtlmu0j5flcykcsh2xagexh9h3jk06a"

main_nested_segwit_address = "3Nu78Cqcf6hsD4sUBAN9nP13tYiHU9QPFX"
test_nested_segwit_address = "2N6JbrvPMMwbBhu2KxqXyyHUQz3XKspvyfm"

main_bech32_address2 = "bitcoin:bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq?amount=12000"
main_bech32_address3 = "BITCOIN:bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq?junk"

d = DecodeQR()
d.add_data(bad1)

assert d.qr_type == QRType.INVALID

d = DecodeQR()
d.add_data(legacy_address1)

assert d.get_address() == legacy_address1
assert d.get_address_type() == (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET)

d = DecodeQR()
d.add_data(legacy_address2)

assert d.get_address() == legacy_address2
assert d.get_address_type() == (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET)

d = DecodeQR()
d.add_data(main_bech32_address)

assert d.get_address() == main_bech32_address
assert d.get_address_type() == (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET)

d = DecodeQR()
d.add_data(test_bech32_address)

assert d.get_address() == test_bech32_address
assert d.get_address_type() == (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET)

d = DecodeQR()
d.add_data(main_nested_segwit_address)

assert d.get_address() == main_nested_segwit_address
assert d.get_address_type() == (SettingsConstants.NESTED_SEGWIT, SettingsConstants.MAINNET)

d = DecodeQR()
d.add_data(test_nested_segwit_address)

assert d.get_address() == test_nested_segwit_address
assert d.get_address_type() == (SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET)
# Test data for bitcoin address decoding. All generated from test key: ["abandon"] * 11 + ["about"]
legacy_address_mainnet = "1LqBGSKuX5yYUonjxT5qGfpUsXKYYWeabA"
legacy_address_testnet = "mkpZhYtJu2r87Js3pDiWJDmPte2NRZ8bJV"

nested_segwit_address_mainnet = "37VucYSaXLCAsxYyAPfbSi9eh4iEcbShgf"
nested_segwit_address_testnet = "2Mww8dCYPUpKHofjgcXcBCEGmniw9CoaiD2"

native_segwit_address_mainnet = "bc1qcr8te4kr609gcawutmrza0j4xv80jy8z306fyu"
native_segwit_address_testnet = "tb1q6rz28mcfaxtmd6v789l9rrlrusdprr9pqcpvkl"
native_segwit_address_regtest = "bcrt1q6rz28mcfaxtmd6v789l9rrlrusdprr9pz3cppk"

taproot_address_mainnet = "bc1p5cyxnuxmeuwuvkwfem96lqzszd02n6xdcjrs20cac6yqjjwudpxqkedrcr"
taproot_address_testnet = "tb1p8wpt9v4frpf3tkn0srd97pksgsxc5hs52lafxwru9kgeephvs7rqlqt9zj"
taproot_address_regtest = "bcrt1p8wpt9v4frpf3tkn0srd97pksgsxc5hs52lafxwru9kgeephvs7rqjeprhg"



def test_bitcoin_address():
"""
Decoder should parse various types of valid bitcoin addresses with or without the
"bitcoin:" prefix and optional query params.
"""
def decode(address, expected_script_type, expected_network=SettingsConstants.MAINNET):
for data in [address, "bitcoin:" + address, "bitcoin:" + address + "?amount=12000"]:
d = DecodeQR()
d.add_data(data)
assert d.get_address() == address
assert d.get_address_type() == (expected_script_type, expected_network)

decode(legacy_address_mainnet, SettingsConstants.LEGACY_P2PKH)
decode(legacy_address_testnet, SettingsConstants.LEGACY_P2PKH, SettingsConstants.TESTNET)
decode(nested_segwit_address_mainnet, SettingsConstants.NESTED_SEGWIT)
decode(nested_segwit_address_testnet, SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET)
decode(native_segwit_address_mainnet, SettingsConstants.NATIVE_SEGWIT)
decode(native_segwit_address_testnet, SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET)
decode(native_segwit_address_regtest, SettingsConstants.NATIVE_SEGWIT, SettingsConstants.REGTEST)
decode(taproot_address_mainnet, SettingsConstants.TAPROOT)
decode(taproot_address_testnet, SettingsConstants.TAPROOT, SettingsConstants.TESTNET)
decode(taproot_address_regtest, SettingsConstants.TAPROOT, SettingsConstants.REGTEST)



def test_invalid_bitcoin_address():
"""
Decoder should fail to parse invalid address data.
* Test incorrect "bitcoin:" prefix.
* Test invalid addresses.
* Test valid addresses w/additional prefixes (to ensure regexp is not finding
mid-string matches) which make the data invalid.
"""
bad_inputs = [
# wrong separator char
"bitcoin=bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq",

# Unrecognized addr prefix
"bitcoin:bcfakehrp1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq",

# valid addr w/garbage addr prefix
"abcbc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq",

# valid "bitcoin:" prefix w/garbage addr prefix
"bitcoin:abcbc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq",

# typo in "bitcoin:" prefix
"bitcon:bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq",
]

d = DecodeQR()
d.add_data(main_bech32_address2)

assert d.get_address() == "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq"
assert d.get_address_type() == (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET)
for bad_input in bad_inputs:
d = DecodeQR()
status = d.add_data(bad_input)
assert status == DecodeQRStatus.INVALID



def test_bitcoin_address_ignores_case_where_allowed():
"""
Decoder should ignore case in QR data prefix and in the address itself (for the
address types where case is ignored).
"""
def decode(address, expected_script_type, is_case_sensitive, expected_network=SettingsConstants.MAINNET):
addr_variations = [address]
if not is_case_sensitive:
# Test as-is and all uppercase
addr_variations.append(address.upper())

for addr_variation in addr_variations:
# First add prefix capitalizations
variations_1 = ["bitcoin:" + addr_variation, "BITCOIN:" + addr_variation]

# Now add query params
variations_2 = [v + "?amount=12000" for v in variations_1]
variations_3 = [v + "?AMOUNT=12000" for v in variations_1]
for data in [addr_variation] + variations_1 + variations_2 + variations_3:
d = DecodeQR()
d.add_data(data)
assert d.get_address_type() == (expected_script_type, expected_network)

if not is_case_sensitive:
assert d.get_address() == addr_variation.lower()
else:
assert d.get_address() == addr_variation

# Case sensitive address types
decode(legacy_address_mainnet, SettingsConstants.LEGACY_P2PKH, is_case_sensitive=True)
decode(legacy_address_testnet, SettingsConstants.LEGACY_P2PKH, is_case_sensitive=True, expected_network=SettingsConstants.TESTNET)
decode(nested_segwit_address_mainnet, SettingsConstants.NESTED_SEGWIT, is_case_sensitive=True)
decode(nested_segwit_address_testnet, SettingsConstants.NESTED_SEGWIT, is_case_sensitive=True, expected_network=SettingsConstants.TESTNET)

# Case insensitive address types
decode(native_segwit_address_mainnet, SettingsConstants.NATIVE_SEGWIT, is_case_sensitive=False)
decode(native_segwit_address_testnet, SettingsConstants.NATIVE_SEGWIT, is_case_sensitive=False, expected_network=SettingsConstants.TESTNET)
decode(native_segwit_address_regtest, SettingsConstants.NATIVE_SEGWIT, is_case_sensitive=False, expected_network=SettingsConstants.REGTEST)
decode(taproot_address_mainnet, SettingsConstants.TAPROOT, is_case_sensitive=False)
decode(taproot_address_testnet, SettingsConstants.TAPROOT, is_case_sensitive=False, expected_network=SettingsConstants.TESTNET)
decode(taproot_address_regtest, SettingsConstants.TAPROOT, is_case_sensitive=False, expected_network=SettingsConstants.REGTEST)



Expand Down
Loading