forked from moj-analytical-services/splink
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mkdocs.yml
277 lines (276 loc) · 12.5 KB
/
mkdocs.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
site_name: Splink
site_url: https://moj-analytical-services.github.io/splink/
use_directory_urls: false
repo_url: https://github.com/moj-analytical-services/splink
edit_uri: edit/master/docs/
theme:
icon:
repo: fontawesome/brands/github
name: "material"
features:
- content.code.annotate
- content.code.copy
- content.tabs.link
- content.tooltips
- header.autohide
- search.highlight
- search.share
- search.suggest
- navigation.indexes
- navigation.footer
- content.action.edit
- navigation.tabs
- navigation.tabs.sticky
- navigation.top
- toc.follow
logo: "img/favicon.ico"
favicon: "img/favicon.ico"
palette:
- scheme: default
primary: teal
accent: purple
toggle:
icon: material/toggle-switch
name: Switch to dark mode
- scheme: slate
primary: teal
accent: purple
toggle:
icon: material/toggle-switch-off-outline
name: Switch to light mode
custom_dir: docs/overrides
plugins:
- blog:
post_date_format: long
archive: false
- search
- mknotebooks:
tag_remove_configs:
remove_input_tags:
- hide_input
remove_all_outputs_tags:
- hide_output
- tags
- mkdocstrings:
default_handler: python
handlers:
python:
rendering:
show_source: false
- charts
- mkdocs-video:
is_video: True
video_autoplay: false
- rss:
match_path: blog/posts/.*
date_from_meta:
as_creation: date
hooks:
- docs/hooks/__init__.py
markdown_extensions:
- abbr
- attr_list
- meta
- admonition
- md_in_html
- pymdownx.details
- pymdownx.arithmatex:
generic: true
- pymdownx.superfences:
custom_fences:
- name: vegalite
class: vegalite
format: !!python/name:mkdocs_charts_plugin.fences.fence_vegalite
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
- pymdownx.snippets:
auto_append:
- includes/abbreviations.md
- pymdownx.tabbed:
alternate_style: true
- toc:
permalink: True
toc_depth: 3
- attr_list
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- footnotes
- neoteroi.cards
nav:
- Getting Started: "getting_started.md"
- Tutorial:
- Introduction: "demos/tutorials/00_Tutorial_Introduction.ipynb"
- 1. Data prep prerequisites: "demos/tutorials/01_Prerequisites.ipynb"
- 2. Exploratory analysis: "demos/tutorials/02_Exploratory_analysis.ipynb"
- 3. Blocking: "demos/tutorials/03_Blocking.ipynb"
- 4. Estimating model parameters: "demos/tutorials/04_Estimating_model_parameters.ipynb"
- 5. Predicting results: "demos/tutorials/05_Predicting_results.ipynb"
- 6. Visualising predictions: "demos/tutorials/06_Visualising_predictions.ipynb"
- 7. Evaluation: "demos/tutorials/07_Evaluation.ipynb"
- Examples:
- Introduction: "demos/examples/examples_index.md"
- DuckDB:
- Deduplicate 50k rows historical persons: "demos/examples/duckdb/deduplicate_50k_synthetic.ipynb"
- Linking financial transactions: "demos/examples/duckdb/transactions.ipynb"
- Linking two tables of persons: "demos/examples/duckdb/link_only.ipynb"
- Real time record linkage: "demos/examples/duckdb/real_time_record_linkage.ipynb"
- Evaluation from ground truth column: "demos/examples/duckdb/accuracy_analysis_from_labels_column.ipynb"
- Estimating m probabilities from labels: "demos/examples/duckdb/pairwise_labels.ipynb"
- Quick and dirty persons model: "demos/examples/duckdb/quick_and_dirty_persons.ipynb"
- Deterministic dedupe: "demos/examples/duckdb/deterministic_dedupe.ipynb"
- Febrl3 Dedupe: "demos/examples/duckdb/febrl3.ipynb"
- Febrl4 link-only: "demos/examples/duckdb/febrl4.ipynb"
- Cookbook: "demos/examples/duckdb/cookbook.ipynb"
- PySpark:
- Deduplication using Pyspark: "demos/examples/spark/deduplicate_1k_synthetic.ipynb"
- Athena:
- Deduplicate 50k rows historical persons: "demos/examples/athena/deduplicate_50k_synthetic.ipynb"
- SQLite:
- Deduplicate 50k rows historical persons: "demos/examples/sqlite/deduplicate_50k_synthetic.ipynb"
- API Docs:
- Introduction: "api_docs/api_docs_index.md"
- Linker:
- Training: "api_docs/training.md"
- Visualisations: "api_docs/visualisations.md"
- Inference: "api_docs/inference.md"
- Clustering: "api_docs/clustering.md"
- Evaluation: "api_docs/evaluation.md"
- Table Management: "api_docs/table_management.md"
- Miscellaneous functions: "api_docs/misc.md"
- Comparisons Library:
- Comparison Library: "api_docs/comparison_library.md"
- Comparison Level Library: "api_docs/comparison_level_library.md"
- Other:
- Exploratory: "api_docs/exploratory.md"
- Blocking rule creator: "api_docs/blocking.md"
- Blocking analysis: "api_docs/blocking_analysis.md"
- SplinkDataFrame: "api_docs/splink_dataframe.md"
- EM Training Session API: "api_docs/em_training_session.md"
- SplinkDatasets: "api_docs/datasets.md"
- Settings Dict: "api_docs/settings_dict_guide.md"
- User Guide:
- Introduction: "topic_guides/topic_guides_index.md"
- Record Linkage Theory:
- Why do we need record linkage?: "topic_guides/theory/record_linkage.md"
- Probabilistic vs Deterministic linkage: "topic_guides/theory/probabilistic_vs_deterministic.md"
- The Fellegi-Sunter Model: "topic_guides/theory/fellegi_sunter.md"
- Linked Data as Graphs: "topic_guides/theory/linked_data_as_graphs.md"
- Linkage Models in Splink:
- Defining Splink models: "topic_guides/splink_fundamentals/settings.md"
- Retrieving and querying Splink results: "topic_guides/splink_fundamentals/querying_splink_results.md"
- Link type - linking vs deduping: "topic_guides/splink_fundamentals/link_type.md"
- Splink's SQL backends - Spark, DuckDB etc:
- Backends overview: "topic_guides/splink_fundamentals/backends/backends.md"
- PostgreSQL: "topic_guides/splink_fundamentals/backends/postgres.md"
- Data Preparation:
- Feature Engineering: "topic_guides/data_preparation/feature_engineering.md"
- Blocking:
- What are Blocking Rules?: "topic_guides/blocking/blocking_rules.md"
- Computational Performance: "topic_guides/blocking/performance.md"
- Model Training Blocking Rules: "topic_guides/blocking/model_training.md"
- Comparing Records:
- Comparisons and comparison levels: "topic_guides/comparisons/comparisons_and_comparison_levels.md"
- Defining and customising comparisons: "topic_guides/comparisons/customising_comparisons.ipynb"
- Out-of-the-box comparisons: "topic_guides/comparisons/out_of_the_box_comparisons.ipynb"
- Term frequency adjustments: "topic_guides/comparisons/term-frequency.md"
- Comparing strings:
- String comparators: "topic_guides/comparisons/comparators.md"
- Choosing string comparators: "topic_guides/comparisons/choosing_comparators.ipynb"
- Phonetic algorithms: "topic_guides/comparisons/phonetic.md"
- Regular expressions: "topic_guides/comparisons/regular_expressions.ipynb"
- Evaluation:
- Overview: "topic_guides/evaluation/overview.md"
- Model: "topic_guides/evaluation/model.md"
- Edges (Links):
- Overview: "topic_guides/evaluation/edge_overview.md"
- Edge Metrics: "topic_guides/evaluation/edge_metrics.md"
- Clerical Labelling: "topic_guides/evaluation/labelling.md"
- Clusters:
- Overview: "topic_guides/evaluation/clusters/overview.md"
- Graph metrics: "topic_guides/evaluation/clusters/graph_metrics.md"
- How to compute graph metrics: "topic_guides/evaluation/clusters/how_to_compute_metrics.ipynb"
- Performance:
- Run times, performance and linking large data: "topic_guides/performance/drivers_of_performance.md"
- Spark Performance:
- Optimising Spark performance: "topic_guides/performance/optimising_spark.md"
- Salting blocking rules: "topic_guides/performance/salting.md"
- DuckDB Performance:
- Optimising DuckDB performance: "topic_guides/performance/optimising_duckdb.md"
- Charts Gallery:
- "charts/index.md"
- Exploratory Analysis:
- completeness chart: "charts/completeness_chart.ipynb"
- profile columns: "charts/profile_columns.ipynb"
- Blocking:
- cumulative num comparisons from blocking rules chart: "charts/cumulative_comparisons_to_be_scored_from_blocking_rules_chart.ipynb"
- Similarity analysis:
- Comparator score chart: "topic_guides/comparisons/choosing_comparators.html#comparing-string-similarity-and-distance-scores"
- Comparator score threshold chart: "topic_guides/comparisons/choosing_comparators.html#choosing-thresholds"
- Phonetic match chart: "topic_guides/comparisons/choosing_comparators.html#phonetic-matching"
- Model Training:
- comparison viewer dashboard: "charts/comparison_viewer_dashboard.ipynb"
- match weights chart: "charts/match_weights_chart.ipynb"
- m u parameters chart: "charts/m_u_parameters_chart.ipynb"
- parameter estimate comparisons chart: "charts/parameter_estimate_comparisons_chart.ipynb"
- tf adjustment chart: "charts/tf_adjustment_chart.ipynb"
- unlinkables chart: "charts/unlinkables_chart.ipynb"
- waterfall chart: "charts/waterfall_chart.ipynb"
- Clustering:
- cluster studio dashboard: "charts/cluster_studio_dashboard.ipynb"
- Model Evaluation:
- accuracy chart from labels table: "charts/accuracy_analysis_from_labels_table.ipynb"
- threshold selection tool: charts/threshold_selection_tool_from_labels_table.ipynb
- Contributing:
- "dev_guides/index.md"
- Contributing to Splink:
- Contributor Guide: "dev_guides/CONTRIBUTING.md"
- Development Quickstart: "dev_guides/changing_splink/development_quickstart.md"
- Building your local environment: "dev_guides/changing_splink/building_env_locally.md"
- Linting and Formatting: "dev_guides/changing_splink/lint_and_format.md"
- Testing: "dev_guides/changing_splink/testing.md"
- Contributing to Documentation: "dev_guides/changing_splink/contributing_to_docs.md"
- Managing Dependencies with Poetry: "dev_guides/changing_splink/managing_dependencies_with_poetry.md"
- Releasing a Package Version: "dev_guides/changing_splink/releases.md"
- Contributing to the Splink Blog: "dev_guides/changing_splink/blog_posts.md"
- How Splink works:
- Understanding and debugging Splink: "dev_guides/debug_modes.md"
- Transpilation using sqlglot: "dev_guides/transpilation.md"
- Performance and caching:
- Caching and pipelining: "dev_guides/caching.md"
- Spark caching: "dev_guides/spark_pipelining_and_caching.md"
- Charts:
- Understanding and editing charts: "dev_guides/charts/understanding_and_editing_charts.md"
- Building new charts: "dev_guides/charts/building_charts.ipynb"
- User-Defined Functions: "dev_guides/udfs.md"
- Settings Validation:
- Settings Validation Overview: "dev_guides/settings_validation/settings_validation_overview.md"
- Extending the Settings Validator: "dev_guides/settings_validation/extending_settings_validator.md"
- Dependency Compatibility Policy: "dev_guides/dependency_compatibility_policy.md"
- Blog:
- blog/index.md
extra_css:
- css/custom.css
- css/neoteroi-mkdocs.css
extra_javascript:
- javascripts/mathjax.js
- https://polyfill.io/v3/polyfill.min.js?features=es6
- https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
- https://cdn.jsdelivr.net/npm/vega@5
- https://cdn.jsdelivr.net/npm/vega-lite@5
- https://cdn.jsdelivr.net/npm/vega-embed@6
extra:
analytics:
provider: google
property: G-L6JT8NK528
social:
- icon: fontawesome/brands/github
link: https://github.com/moj-analytical-services/splink
- icon: fontawesome/brands/python
link: https://pypi.org/project/splink/
- icon: fontawesome/solid/chevron-right
link: https://www.robinlinacre.com/
- icon: fontawesome/solid/rss
link: https://moj-analytical-services.github.io/splink/feed_rss_created.xml
new: Recently added