diff --git a/src/seedsigner/models/decode_qr.py b/src/seedsigner/models/decode_qr.py index e98a2c56..1135a140 100644 --- a/src/seedsigner/models/decode_qr.py +++ b/src/seedsigner/models/decode_qr.py @@ -507,8 +507,7 @@ def base43_decode(s): def is_bitcoin_address(s): if re.search(r'^bitcoin\:.*', s, re.IGNORECASE): return True - elif re.search(r'^((bc1|tb1|bcr|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,62})$', s): - # TODO: Handle regtest bcrt? + elif re.search(r'^((bc1|tb1|bcr|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,62})$', s, re.IGNORECASE): return True else: return False @@ -940,61 +939,72 @@ def __init__(self): def add(self, segment, qr_type=QRType.BITCOIN_ADDRESS): - r = re.search(r'((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})', segment) - if r != None: - self.address = r.group(1) - - if re.search(r'^((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})$', self.address) != None: - self.complete = True - self.collected_segments = 1 - - # get address type - r = re.search(r'^((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})$', self.address) - if r != None: - r = r.group(2) - - if r == "1": - # Legacy P2PKH. mainnet - self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET) + """ + Input may be prefixed with "bitcoin:" but will be ignored. + + RegEx searches for a recognizable bitcoin address. + * The `^` ensures that the specified address prefixes can only match at + the beginning of the address. + + Result will yield the following match groups: + * group 1: complete address + * group 2: address prefix + """ + address_match = re.search(r'^((bc1q|tb1q|bcrt1q|bc1p|tb1p|bcrt1p|[123]|[mn])[a-zA-HJ-NP-Z0-9]{25,64})', segment.split(":")[-1], re.IGNORECASE) + if address_match != None: + self.address = address_match.group(1) + self.complete = True + self.collected_segments = 1 + + # Have to handle wallets that uppercase bech32 addresses. + # Note that it's safe to lowercase the prefix for ALL addr formats. + addr_prefix = address_match.group(2).lower() + + if addr_prefix == "1": + # Legacy P2PKH. mainnet + self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET) - elif r == "m" or r == "n": - self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.TESTNET) + elif addr_prefix in ["m", "n"]: + self.address_type = (SettingsConstants.LEGACY_P2PKH, SettingsConstants.TESTNET) - elif r == "3": - # Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); mainnet - # TODO: Would be more correct to use a P2SH constant - self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.MAINNET) + elif addr_prefix == "3": + # Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); mainnet + # TODO: Would be more correct to use a P2SH constant + self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.MAINNET) - elif r == "2": - # Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); testnet / regtest - self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET) + elif addr_prefix == "2": + # Nested segwit single sig (p2sh-p2wpkh), nested segwit multisig (p2sh-p2wsh), or legacy multisig (p2sh); testnet / regtest + self.address_type = (SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET) - elif r == "bc1q": - # Native Segwit (single sig or multisig), mainnet - self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET) + elif addr_prefix == "bc1q": + # Native Segwit (single sig or multisig), mainnet + self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET) - elif r == "tb1q": - # Native Segwit (single sig or multisig), testnet - self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET) + elif addr_prefix == "tb1q": + # Native Segwit (single sig or multisig), testnet + self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET) - elif r == "bcrt1q": - # Native Segwit (single sig or multisig), regtest - self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.REGTEST) + elif addr_prefix == "bcrt1q": + # Native Segwit (single sig or multisig), regtest + self.address_type = (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.REGTEST) - elif r == "bc1p": - # Native Segwit (single sig or multisig), mainnet - self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.MAINNET) + elif addr_prefix == "bc1p": + self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.MAINNET) - elif r == "tb1p": - # Native Segwit (single sig or multisig), testnet - self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.TESTNET) + elif addr_prefix == "tb1p": + self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.TESTNET) - elif r == "bcrt1p": - # Native Segwit (single sig or multisig), regtest - self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.REGTEST) - - return DecodeQRStatus.COMPLETE + elif addr_prefix == "bcrt1p": + self.address_type = (SettingsConstants.TAPROOT, SettingsConstants.REGTEST) + # Note: there is no final "else" here because the regex won't return any other matches. + + # If the addr type is case-insensitive, ensure we return it lowercase + if self.address_type[0] in [SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TAPROOT]: + self.address = self.address.lower() + + return DecodeQRStatus.COMPLETE + logger.debug(f"Invalid address: {segment}") return DecodeQRStatus.INVALID diff --git a/tests/test_decodepsbtqr.py b/tests/test_decodepsbtqr.py index 9b7d6981..7672d62b 100644 --- a/tests/test_decodepsbtqr.py +++ b/tests/test_decodepsbtqr.py @@ -281,69 +281,121 @@ def test_short_4_letter_mnemonic_qr(): assert d.get_seed_phrase() == ["height", "demise", "useless", "trap", "grow", "lion", "found", "off", "key", "clown", "transfer", "enroll"] -def test_bitcoin_address(): - bad1 = "loremipsum" - bad2 = "0xde0b295669a9fd93d5f28d9ec85e40f4cb697bae" - bad3 = "121802020768124106400009195602431595117715840445" - - legacy_address1 = "1KFHE7w8BhaENAswwryaoccDb6qcT6DbYY" - legacy_address2 = "16ftSEQ4ctQFDtVZiUBusQUjRrGhM3JYwe" - - main_bech32_address = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq" - test_bech32_address = "tb1qkurj377gtlmu0j5flcykcsh2xagexh9h3jk06a" - - main_nested_segwit_address = "3Nu78Cqcf6hsD4sUBAN9nP13tYiHU9QPFX" - test_nested_segwit_address = "2N6JbrvPMMwbBhu2KxqXyyHUQz3XKspvyfm" - - main_bech32_address2 = "bitcoin:bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq?amount=12000" - main_bech32_address3 = "BITCOIN:bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq?junk" - - d = DecodeQR() - d.add_data(bad1) - - assert d.qr_type == QRType.INVALID - - d = DecodeQR() - d.add_data(legacy_address1) - - assert d.get_address() == legacy_address1 - assert d.get_address_type() == (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET) - - d = DecodeQR() - d.add_data(legacy_address2) - - assert d.get_address() == legacy_address2 - assert d.get_address_type() == (SettingsConstants.LEGACY_P2PKH, SettingsConstants.MAINNET) - - d = DecodeQR() - d.add_data(main_bech32_address) - - assert d.get_address() == main_bech32_address - assert d.get_address_type() == (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET) - - d = DecodeQR() - d.add_data(test_bech32_address) - - assert d.get_address() == test_bech32_address - assert d.get_address_type() == (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET) - - d = DecodeQR() - d.add_data(main_nested_segwit_address) - - assert d.get_address() == main_nested_segwit_address - assert d.get_address_type() == (SettingsConstants.NESTED_SEGWIT, SettingsConstants.MAINNET) - - d = DecodeQR() - d.add_data(test_nested_segwit_address) - - assert d.get_address() == test_nested_segwit_address - assert d.get_address_type() == (SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET) +# Test data for bitcoin address decoding. All generated from test key: ["abandon"] * 11 + ["about"] +legacy_address_mainnet = "1LqBGSKuX5yYUonjxT5qGfpUsXKYYWeabA" +legacy_address_testnet = "mkpZhYtJu2r87Js3pDiWJDmPte2NRZ8bJV" + +nested_segwit_address_mainnet = "37VucYSaXLCAsxYyAPfbSi9eh4iEcbShgf" +nested_segwit_address_testnet = "2Mww8dCYPUpKHofjgcXcBCEGmniw9CoaiD2" + +native_segwit_address_mainnet = "bc1qcr8te4kr609gcawutmrza0j4xv80jy8z306fyu" +native_segwit_address_testnet = "tb1q6rz28mcfaxtmd6v789l9rrlrusdprr9pqcpvkl" +native_segwit_address_regtest = "bcrt1q6rz28mcfaxtmd6v789l9rrlrusdprr9pz3cppk" + +taproot_address_mainnet = "bc1p5cyxnuxmeuwuvkwfem96lqzszd02n6xdcjrs20cac6yqjjwudpxqkedrcr" +taproot_address_testnet = "tb1p8wpt9v4frpf3tkn0srd97pksgsxc5hs52lafxwru9kgeephvs7rqlqt9zj" +taproot_address_regtest = "bcrt1p8wpt9v4frpf3tkn0srd97pksgsxc5hs52lafxwru9kgeephvs7rqjeprhg" + + + +def test_bitcoin_address(): + """ + Decoder should parse various types of valid bitcoin addresses with or without the + "bitcoin:" prefix and optional query params. + """ + def decode(address, expected_script_type, expected_network=SettingsConstants.MAINNET): + for data in [address, "bitcoin:" + address, "bitcoin:" + address + "?amount=12000"]: + d = DecodeQR() + d.add_data(data) + assert d.get_address() == address + assert d.get_address_type() == (expected_script_type, expected_network) + + decode(legacy_address_mainnet, SettingsConstants.LEGACY_P2PKH) + decode(legacy_address_testnet, SettingsConstants.LEGACY_P2PKH, SettingsConstants.TESTNET) + decode(nested_segwit_address_mainnet, SettingsConstants.NESTED_SEGWIT) + decode(nested_segwit_address_testnet, SettingsConstants.NESTED_SEGWIT, SettingsConstants.TESTNET) + decode(native_segwit_address_mainnet, SettingsConstants.NATIVE_SEGWIT) + decode(native_segwit_address_testnet, SettingsConstants.NATIVE_SEGWIT, SettingsConstants.TESTNET) + decode(native_segwit_address_regtest, SettingsConstants.NATIVE_SEGWIT, SettingsConstants.REGTEST) + decode(taproot_address_mainnet, SettingsConstants.TAPROOT) + decode(taproot_address_testnet, SettingsConstants.TAPROOT, SettingsConstants.TESTNET) + decode(taproot_address_regtest, SettingsConstants.TAPROOT, SettingsConstants.REGTEST) + + + +def test_invalid_bitcoin_address(): + """ + Decoder should fail to parse invalid address data. + * Test incorrect "bitcoin:" prefix. + * Test invalid addresses. + * Test valid addresses w/additional prefixes (to ensure regexp is not finding + mid-string matches) which make the data invalid. + """ + bad_inputs = [ + # wrong separator char + "bitcoin=bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", + + # Unrecognized addr prefix + "bitcoin:bcfakehrp1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", + + # valid addr w/garbage addr prefix + "abcbc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", + + # valid "bitcoin:" prefix w/garbage addr prefix + "bitcoin:abcbc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", + + # typo in "bitcoin:" prefix + "bitcon:bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq", + ] - d = DecodeQR() - d.add_data(main_bech32_address2) - - assert d.get_address() == "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq" - assert d.get_address_type() == (SettingsConstants.NATIVE_SEGWIT, SettingsConstants.MAINNET) + for bad_input in bad_inputs: + d = DecodeQR() + status = d.add_data(bad_input) + assert status == DecodeQRStatus.INVALID + + + +def test_bitcoin_address_ignores_case_where_allowed(): + """ + Decoder should ignore case in QR data prefix and in the address itself (for the + address types where case is ignored). + """ + def decode(address, expected_script_type, is_case_sensitive, expected_network=SettingsConstants.MAINNET): + addr_variations = [address] + if not is_case_sensitive: + # Test as-is and all uppercase + addr_variations.append(address.upper()) + + for addr_variation in addr_variations: + # First add prefix capitalizations + variations_1 = ["bitcoin:" + addr_variation, "BITCOIN:" + addr_variation] + + # Now add query params + variations_2 = [v + "?amount=12000" for v in variations_1] + variations_3 = [v + "?AMOUNT=12000" for v in variations_1] + for data in [addr_variation] + variations_1 + variations_2 + variations_3: + d = DecodeQR() + d.add_data(data) + assert d.get_address_type() == (expected_script_type, expected_network) + + if not is_case_sensitive: + assert d.get_address() == addr_variation.lower() + else: + assert d.get_address() == addr_variation + + # Case sensitive address types + decode(legacy_address_mainnet, SettingsConstants.LEGACY_P2PKH, is_case_sensitive=True) + decode(legacy_address_testnet, SettingsConstants.LEGACY_P2PKH, is_case_sensitive=True, expected_network=SettingsConstants.TESTNET) + decode(nested_segwit_address_mainnet, SettingsConstants.NESTED_SEGWIT, is_case_sensitive=True) + decode(nested_segwit_address_testnet, SettingsConstants.NESTED_SEGWIT, is_case_sensitive=True, expected_network=SettingsConstants.TESTNET) + + # Case insensitive address types + decode(native_segwit_address_mainnet, SettingsConstants.NATIVE_SEGWIT, is_case_sensitive=False) + decode(native_segwit_address_testnet, SettingsConstants.NATIVE_SEGWIT, is_case_sensitive=False, expected_network=SettingsConstants.TESTNET) + decode(native_segwit_address_regtest, SettingsConstants.NATIVE_SEGWIT, is_case_sensitive=False, expected_network=SettingsConstants.REGTEST) + decode(taproot_address_mainnet, SettingsConstants.TAPROOT, is_case_sensitive=False) + decode(taproot_address_testnet, SettingsConstants.TAPROOT, is_case_sensitive=False, expected_network=SettingsConstants.TESTNET) + decode(taproot_address_regtest, SettingsConstants.TAPROOT, is_case_sensitive=False, expected_network=SettingsConstants.REGTEST)