Skip to content

Commit

Permalink
Merge pull request #121 from govlt/calculate-checksums
Browse files Browse the repository at this point in the history
Calculate checksums for data sources
  • Loading branch information
vycius authored Jul 30, 2024
2 parents b5d42de + d4e5bec commit cef810e
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 19 deletions.
34 changes: 15 additions & 19 deletions .github/workflows/basemap-vector-data-source.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,27 +38,20 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: .github

- name: Convert address information to GPKG
run: ogr2ogr -f GPKG addresses.gpkg "https://www.registrucentras.lt/aduomenys/?byla=adr_stat_lr.csv" -nln info
sparse-checkout: |
.github
scripts
# The complete geojson data with all municipalities is updated only once a year. However,
# when downloaded per municipality, it is updated every month. To ensure we have the latest data,
# this step pulls data for each municipality individually.
- name: Convert each municipality's geojson data to GPKG format
run: |
curl -sf "https://www.registrucentras.lt/aduomenys/?byla=adr_savivaldybes.csv" | csvcut -d "|" -c "SAV_KODAS" | tail -n +2 | while read -r code; do
echo "Converting https://www.registrucentras.lt/aduomenys/?byla=adr_gra_$code.json"
ogr2ogr -append -f GPKG addresses.gpkg "https://www.registrucentras.lt/aduomenys/?byla=adr_gra_$code.json" -nln points --config GDAL_HTTP_MAX_RETRY 10
done
- name: Generate houses GPKG in EPSG:4326
run: bash scripts/create-houses-geopackage.sh

- name: Create output directory
run: mkdir output

- name: Generate houses GPKG in EPSG:4326
run: ogr2ogr -f GPKG -t_srs EPSG:4326 -xyRes "0.0000001" -sql "SELECT points.fid, points.geom, points.AOB_KODAS, info.sav_kodas, points.gyv_kodas, points.gat_kodas, info.nr, info.pasto_kodas, info.korpuso_nr FROM points INNER JOIN info USING (AOB_KODAS) ORDER BY AOB_KODAS" output/houses-espg-4326.gpkg.zip addresses.gpkg -nln houses
- name: Copy files to output
run: |
cp houses-espg-4326.gpkg.zip output/houses-espg-4326.gpkg.zip
cp data-sources/data-source-checksums.txt output/data-source-checksums.txt
- name: Generate checksum
working-directory: output
Expand Down Expand Up @@ -100,15 +93,18 @@ jobs:
with:
sparse-checkout: .github

- name: Create output directory
run: mkdir output

- name: Download GRPK from GeoPortal
run: wget -nv https://www.geoportal.lt/download/opendata/GRPK/GRPK_Open_SHP.zip

- name: Calculate GRPK_Open_SHP.zip checksum
run: md5sum GRPK_Open_SHP.zip >> output/data-source-checksums.txt

- name: Unzip GRPK data source
run: unzip GRPK_Open_SHP.zip -x "GRPK_Open_SHP/INFO_/*" && rm GRPK_Open_SHP.zip

- name: Create output directory
run: mkdir output

- name: Transform to EPSG:4326
run: ogr2ogr -f "ESRI Shapefile" -lco ENCODING=UTF-8 -t_srs EPSG:4326 output/grpk-espg-4326.shp.zip GRPK_Open_SHP

Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@ bin/

# Ignore Gradle build output directory
build

data-sources
57 changes: 57 additions & 0 deletions scripts/create-houses-geopackage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/bash

set -e

# Function to calculate MD5 checksum
calculate_md5() {
local file="$1"
if [[ "$OSTYPE" == "darwin"* ]]; then
# macOS
md5 -r "$file"
else
# Linux and other Unix-like systems
md5sum "$file"
fi
}

download_data_source_and_md5() {
local filename="$1"
local url="$2"

if ! curl -f -L --max-redirs 5 --retry 3 -o "data-sources/$filename" "$url"; then
echo "Download failed."
return 1
fi

# Calculate the MD5 checksum of the downloaded file
calculate_md5 data-sources/"$filename" >> data-sources/data-source-checksums.txt
}

echo "Starting data processing"

rm -rf houses-espg-4326.gpkg.zip data-sources

mkdir -p data-sources

echo "Importing addresses data into GeoPackage"

download_data_source_and_md5 adr_stat_lr.csv "https://www.registrucentras.lt/aduomenys/?byla=adr_stat_lr.csv"
ogr2ogr -f GPKG "data-sources/addresses.gpkg" "data-sources/adr_stat_lr.csv" -nln info

echo "Importing address points for each municipality"

download_data_source_and_md5 adr_savivaldybes.csv "https://www.registrucentras.lt/aduomenys/?byla=adr_savivaldybes.csv"

csvcut "data-sources/adr_savivaldybes.csv" -d "|" -c "SAV_KODAS" | tail -n +2 | while read -r code; do
echo "Converting https://www.registrucentras.lt/aduomenys/?byla=adr_gra_$code.json"

download_data_source_and_md5 "addresses-$code.json" "https://www.registrucentras.lt/aduomenys/?byla=adr_gra_$code.json"

ogr2ogr -append -f GPKG "data-sources/addresses.gpkg" "data-sources/addresses-$code.json" -nln points
done

echo "Creating houses GeoPackage"

ogr2ogr -f GPKG -t_srs EPSG:4326 -xyRes "0.0000001" -sql "SELECT points.fid, points.geom, points.AOB_KODAS, info.sav_kodas, points.gyv_kodas, points.gat_kodas, info.nr, info.pasto_kodas, info.korpuso_nr FROM points INNER JOIN info USING (AOB_KODAS) ORDER BY AOB_KODAS" houses-espg-4326.gpkg.zip "data-sources/addresses.gpkg" -nln houses

echo "GeoPackage database created successfully"

0 comments on commit cef810e

Please sign in to comment.