Skip to content

Latest commit

 

History

History
382 lines (282 loc) · 8.96 KB

README.md

File metadata and controls

382 lines (282 loc) · 8.96 KB

ClickHouse UDF written in Rust

Collection of some useful UDFs for ClickHouse written in Rust.

Compile into binary

$ cargo build --release

$ ls -lhp target/release | grep -v '/\|\.d'
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 read-wkt-linestring
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 vin-cleaner
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 vin-cleaner-chunk-header
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 vin-manuf
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 vin-manuf-chunk-header
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 vin-year
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 vin-year-chunk-header
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 extract-url
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 has-url
-rwxr-xr-x    1 duet  staff   434K Feb 24 21:26 array-topk
  1. wkt
  2. vin
  3. url
  4. array

Usage

1. wkt

Put the wkt binaries into user_scripts folder (/var/lib/clickhouse/user_scripts/ with default path settings).
$ cd /var/lib/clickhouse/user_scripts/
$ wget https://github.com/duyet/clickhouse-udf-rs/releases/download/0.1.8/clickhouse_udf_wkt_v0.1.8_x86_64-unknown-linux-musl.tar.gz
$ tar zxvf clickhouse_udf_wkt_v0.1.8_x86_64-unknown-linux-musl.tar.gz

read-wkt-linestring
Creating UDF using XML configuration custom_udf_wkt_function.xml

define udf config file wkt_udf_function.xml (/etc/clickhouse-server/custom_udf_wkt_function.xml with default path settings, file name must be matched *_function.xml).

<functions>
  <!-- wkt -->
  <function>
      <name>readWktLineString</name>
      <type>executable_pool</type>
      <command>read-wkt-linestring</command>
      <format>TabSeparated</format>
      <argument>
          <type>String</type>
          <name>value</name>
      </argument>
      <return_type>String</return_type>
  </function>
  
</functions>
ClickHouse example queries
SELECT readWktLineString("LINESTRING (30 10, 10 30, 40 40)")

2. vin

Put the vin binaries into user_scripts folder (/var/lib/clickhouse/user_scripts/ with default path settings).
$ cd /var/lib/clickhouse/user_scripts/
$ wget https://github.com/duyet/clickhouse-udf-rs/releases/download/0.1.8/clickhouse_udf_vin_v0.1.8_x86_64-unknown-linux-musl.tar.gz
$ tar zxvf clickhouse_udf_vin_v0.1.8_x86_64-unknown-linux-musl.tar.gz

vin-cleaner
vin-cleaner-chunk-header
vin-manuf
vin-manuf-chunk-header
vin-year
vin-year-chunk-header
Creating UDF using XML configuration custom_udf_vin_function.xml

define udf config file vin_udf_function.xml (/etc/clickhouse-server/custom_udf_vin_function.xml with default path settings, file name must be matched *_function.xml).

<functions>
  <!-- vin -->
  <function>
      <name>vinCleaner</name>
      <type>executable_pool</type>
      <command>vin-cleaner</command>
      <format>TabSeparated</format>
      <argument>
          <type>String</type>
          <name>value</name>
      </argument>
      <return_type>String</return_type>
  </function>
  <function>
      <name>vinManuf</name>
      <type>executable_pool</type>
      <command>vin-manuf</command>
      <format>TabSeparated</format>
      <argument>
          <type>String</type>
          <name>value</name>
      </argument>
      <return_type>String</return_type>
  </function>
  <function>
      <name>vinYear</name>
      <type>executable_pool</type>
      <command>vin-year</command>
      <format>TabSeparated</format>
      <argument>
          <type>String</type>
          <name>value</name>
      </argument>
      <return_type>String</return_type>
  </function>
  
</functions>
UDF config with <send_chunk_header>1</send_chunk_header>
<functions>
    <!-- vin -->
    
    <function>
        <name>vinCleaner</name>
        <type>executable_pool</type>

        <command>vin-cleaner-chunk-header</command>
        <send_chunk_header>1</send_chunk_header>

        <format>TabSeparated</format>
        <argument>
            <type>String</type>
            <name>value</name>
        </argument>
        <return_type>String</return_type>
    </function>
    
    <function>
        <name>vinManuf</name>
        <type>executable_pool</type>

        <command>vin-manuf-chunk-header</command>
        <send_chunk_header>1</send_chunk_header>

        <format>TabSeparated</format>
        <argument>
            <type>String</type>
            <name>value</name>
        </argument>
        <return_type>String</return_type>
    </function>
    
    <function>
        <name>vinYear</name>
        <type>executable_pool</type>

        <command>vin-year-chunk-header</command>
        <send_chunk_header>1</send_chunk_header>

        <format>TabSeparated</format>
        <argument>
            <type>String</type>
            <name>value</name>
        </argument>
        <return_type>String</return_type>
    </function>
    </functions>
ClickHouse example queries
SELECT vinCleaner("1G1JC1249Y7150000")
SELECT vinCleaner("1G1JC1249Y7150000 ...")

SELECT vinManuf("1G1JC1249Y7150000")

SELECT vinYear("1G1JC1249Y7150000")

3. url

Put the url binaries into user_scripts folder (/var/lib/clickhouse/user_scripts/ with default path settings).
$ cd /var/lib/clickhouse/user_scripts/
$ wget https://github.com/duyet/clickhouse-udf-rs/releases/download/0.1.8/clickhouse_udf_url_v0.1.8_x86_64-unknown-linux-musl.tar.gz
$ tar zxvf clickhouse_udf_url_v0.1.8_x86_64-unknown-linux-musl.tar.gz

extract-url
has-url
Creating UDF using XML configuration custom_udf_url_function.xml

define udf config file url_udf_function.xml (/etc/clickhouse-server/custom_udf_url_function.xml with default path settings, file name must be matched *_function.xml).

<functions>
  <!-- url -->
  <function>
      <name>extractUrl</name>
      <type>executable_pool</type>
      <command>extract-url</command>
      <format>TabSeparated</format>
      <argument>
          <type>String</type>
          <name>value</name>
      </argument>
      <return_type>String</return_type>
  </function>
  <function>
      <name>hasUrl</name>
      <type>executable_pool</type>
      <command>has-url</command>
      <format>TabSeparated</format>
      <argument>
          <type>String</type>
          <name>value</name>
      </argument>
      <return_type>String</return_type>
  </function>
  
</functions>
ClickHouse example queries
SELECT extractUrl("extract from this https://duyet.net")

SELECT hasUrl("extract from this https://duyet.net")
SELECT hasUrl("no url here")

4. array

Put the array binaries into user_scripts folder (/var/lib/clickhouse/user_scripts/ with default path settings).
$ cd /var/lib/clickhouse/user_scripts/
$ wget https://github.com/duyet/clickhouse-udf-rs/releases/download/0.1.8/clickhouse_udf_array_v0.1.8_x86_64-unknown-linux-musl.tar.gz
$ tar zxvf clickhouse_udf_array_v0.1.8_x86_64-unknown-linux-musl.tar.gz

array-topk
Creating UDF using XML configuration custom_udf_array_function.xml

define udf config file array_udf_function.xml (/etc/clickhouse-server/custom_udf_array_function.xml with default path settings, file name must be matched *_function.xml).

<functions>
  <!-- array -->
  <function>
      <name>arrayTopK</name>
      <type>executable_pool</type>
      <command>array-topk</command>
      <format>TabSeparated</format>
      <argument>
          <type>String</type>
          <name>value</name>
      </argument>
      <return_type>String</return_type>
  </function>
  
</functions>
ClickHouse example queries
SELECT arrayTopK(3)([1, 1, 2, 2, 3, 4, 5])
SELECT arrayTopK(1)([2, 3, 4, 5])

Generate README

RELEASE_VERSION=0.1.8 cargo run --bin readme-generator . > README.md

License

MIT

Done