From d9b3f61979574b56df43326913232e01687d5cc1 Mon Sep 17 00:00:00 2001 From: sspaeti Date: Wed, 28 Sep 2022 12:15:28 +0200 Subject: [PATCH 1/4] removing existing glossary of terms and link to new data glossary --- docs/cloud/core-concepts.md | 7 +- docs/understanding-airbyte/glossary.md | 96 -------------------------- docusaurus/docusaurus.config.js | 49 ++++++++++++- docusaurus/sidebars.js | 1 - 4 files changed, 54 insertions(+), 99 deletions(-) delete mode 100644 docs/understanding-airbyte/glossary.md diff --git a/docs/cloud/core-concepts.md b/docs/cloud/core-concepts.md index 55a2270f44e8..d64315393db4 100644 --- a/docs/cloud/core-concepts.md +++ b/docs/cloud/core-concepts.md @@ -153,4 +153,9 @@ After a sync is complete, Airbyte normalizes the data. When setting up a connect A workspace is a grouping of sources, destinations, connections, and other configurations. It lets you collaborate with team members and share resources across your team under a shared billing account. -When you [sign up](http://cloud.airbyte.io/signup) for Airbyte Cloud, we automatically create your first workspace where you are the only user with access. You can set up your sources and destinations to start syncing data and invite other users to join your workspace. \ No newline at end of file +When you [sign up](http://cloud.airbyte.io/signup) for Airbyte Cloud, we automatically create your first workspace where you are the only user with access. You can set up your sources and destinations to start syncing data and invite other users to join your workspace. + +## Glossary of Terms + +You find and extended list of [Airbyte specific terms](https://glossary.airbyte.com/term/airbyte-glossary-of-terms/), [data engineering concepts](https://glossary.airbyte.com/term/data-engineering-concepts) or many [other data related terms](https://glossary.airbyte.com/). + diff --git a/docs/understanding-airbyte/glossary.md b/docs/understanding-airbyte/glossary.md deleted file mode 100644 index 187bd806a28c..000000000000 --- a/docs/understanding-airbyte/glossary.md +++ /dev/null @@ -1,96 +0,0 @@ -# Glossary of Terms - -### Airbyte CDK - -The Airbyte CDK (Connector Development Kit) allows you to create connectors for Sources or Destinations. -If your source or destination doesn't exist, you can use the CDK to make the building process a lot easier. -It generates all the tests and files you need and all you need to do is write the connector-specific code -for your source or destination. We created one in Python which you can check out [here](../connector-development/cdk-python/) -and the Faros AI team created a Javascript/Typescript one that you can check out [here](../connector-development/cdk-faros-js.md). - -### DAG - -DAG stands for **Directed Acyclic Graph**. It's a term originally coined by math graph theorists that -describes a tree-like process that cannot contain loops. For example, in the following diagram, you start -at A and can choose B or C, which then proceed to D and E, respectively. This kind of structure is great -for representing workflows and is what tools like [Airflow](https://airflow.apache.org) use to orchestrate -the execution of software based on different cases or states. - - - -![](../.gitbook/assets/glossary_dag_example.png) - -### ETL/ELT - -Stands for **E**xtract, **T**ransform, and **L**oad and **E**xtract, **L**oad, and **T**ransform, respectively. - -**Extract**: Retrieve data from a [source](../integrations/README.md#Sources), which can be an application, database, anything really. - -**Load**: Move data to your [destination](../integrations/README.md#Destinations). - -**Transform**: Clean up the data. This is referred to as [normalization](basic-normalization.md) -in Airbyte and involves [deduplication](connections/incremental-deduped-history.md), changing data types, formats, and more. - -### Full Refresh Sync - -A **Full Refresh Sync** will attempt to retrieve all data from the source every time a sync is run. -Then there are two choices, **Overwrite** and **Append**. **Overwrite** deletes the data in the destination before running the -sync and **Append** doesn't. - -### Incremental Sync - -An **Incremental Sync** will only retrieve new data from the source when a sync occurs. -The first sync will always attempt to retrieve all the data. -If the [destination supports it](https://discuss.airbyte.io/t/what-destinations-support-the-incremental-deduped-sync-mode/89), -you can have your data deduplicated. Simply put, this just means that if you sync an updated -version of a record you've already synced, it will remove the old record. - -### Partial Success - -A **Partial Success** indicates that some records were successfully committed to the destination during a sync, -even when the overall sync status was reported as a failure. - -### Raw Tables - -Airbyte spits out tables with the prefix `_airbyte_raw_`. This is your replicated data, but the prefix -indicates that it's not normalized. If you select basic normalization, Airbyte will create renamed versions without the prefix. - -## Advanced Terms - -### AirbyteCatalog - -:::info - -This is only relevant for individuals who want to create a connector. - -::: - -This refers to how you define the data that you can retrieve from a Source. For example, -if you want to retrieve information from an API, the data that you can receive needs to be -defined clearly so that Airbyte can have a clear expectation of what endpoints are supported -and what the objects that the streams return look like. This is represented as a sort of schema -that Airbyte can interpret. Learn more [here](beginners-guide-to-catalog.md). - -### Airbyte Specification - -:::info - -This is only relevant for individuals who want to create a connector. - -::: - -This refers to the functions that a Source or Destination must implement to successfully -retrieve data and load it, respectively. Implementing these functions using the Airbyte -Specification makes a Source or Destination work correctly. Learn more [here](airbyte-protocol.md). - -### Temporal - -:::info - -This is only relevant for individuals who want to learn about or contribute to our underlying platform. - -::: - -[Temporal](https://temporal.io) is a development kit that lets you create workflows, -parallelize them, and handle failures/retries gracefully. We use it to reliably schedule each step -of the ELT process, and a Temporal service is always deployed with each Airbyte installation. diff --git a/docusaurus/docusaurus.config.js b/docusaurus/docusaurus.config.js index fac11b0747b0..8b1d649404b4 100644 --- a/docusaurus/docusaurus.config.js +++ b/docusaurus/docusaurus.config.js @@ -49,7 +49,54 @@ const config = { from: '/operator-guides/securing-airbyte', to: '/operator-guides/security', }, - + // { + // from: '/understanding-airbyte/glossary', + // to: 'https://glossary.airbyte.com/term/airbyte-glossary-of-terms/', + // }, + // { + // from: '/understanding-airbyte/glossary#advanced-terms', + // to: 'https://glossary.airbyte.com/term/airbyte-glossary-of-terms/#advanced-terms', + // }, + // { + // from: '/understanding-airbyte/glossary#airbyte-cdk', + // to: 'https://glossary.airbyte.com/term/airbyte-cdk/', + // }, + // { + // from: '/understanding-airbyte/glossary#dag', + // to: 'https://glossary.airbyte.com/term/dag-directed-acyclic-graph/', + // }, + // { + // from: '/understanding-airbyte/glossary#etlelt', + // to: 'https://glossary.airbyte.com/term/etl-elt-airbyte/', + // }, + // { + // from: '/understanding-airbyte/glossaryfull-refresh-sync', + // to: 'https://glossary.airbyte.com/term/full-refresh-synchronization/', + // }, + // { + // from: '/understanding-airbyte/glossary#incremental-sync', + // to: 'https://glossary.airbyte.com/term/incremental-synchronization/', + // }, + // { + // from: '/understanding-airbyte/glossary#partial-success', + // to: 'https://glossary.airbyte.com/term/partial-success', + // }, + // { + // from: '/understanding-airbyte/glossary#raw-tables', + // to: 'https://glossary.airbyte.com/term/raw-tables/', + // }, + // { + // from: '/understanding-airbyte/glossary#airbytecatalog', + // to: 'https://glossary.airbyte.com/term/airbyte-catalog/', + // }, + // { + // from: '/understanding-airbyte/glossary#airbyte-specification', + // to: 'https://glossary.airbyte.com/term/airbyte-specification/', + // }, + // { + // from: '/understanding-airbyte/glossary#temporal', + // to: 'https://glossary.airbyte.com/term/temporal/', + // }, // { // from: '/some-lame-path', // to: '/a-much-cooler-uri', diff --git a/docusaurus/sidebars.js b/docusaurus/sidebars.js index 0de147739641..77b1a246f02a 100644 --- a/docusaurus/sidebars.js +++ b/docusaurus/sidebars.js @@ -274,7 +274,6 @@ module.exports = { 'understanding-airbyte/namespaces', 'understanding-airbyte/supported-data-types', 'understanding-airbyte/json-avro-conversion', - 'understanding-airbyte/glossary', ] }, { From 27569ae1b9b2fc1280a95f99d941c02ffca6d298 Mon Sep 17 00:00:00 2001 From: sspaeti Date: Wed, 28 Sep 2022 14:22:47 +0200 Subject: [PATCH 2/4] adding databricks tutorial to docs --- docs/integrations/destinations/databricks.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index 0c02b53fe02a..6821168aa0b5 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -101,6 +101,9 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A 2. Credentials for an S3 bucket. See [documentation](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys). 3. Grant the Databricks cluster full access to the S3 bucket. Or mount it as Databricks File System \(DBFS\). See [documentation](https://docs.databricks.com/data/data-sources/aws/amazon-s3.html). +## Related tutorial +Suppose you are interested in learning more about the Databricks connector or details on how the Delta Lake tables are created. You may want to consult the tutorial on [How to Load Data into Delta Lake on Databricks Lakehouse](https://airbyte.com/tutorials/load-data-into-delta-lake-on-databricks-lakehouse). + ## CHANGELOG | Version | Date | Pull Request | Subject | From e62a164e2b1b2fe29f9b8ba2e464f584371bc2fd Mon Sep 17 00:00:00 2001 From: sspaeti Date: Wed, 28 Sep 2022 15:41:02 +0200 Subject: [PATCH 3/4] missing # --- docusaurus/docusaurus.config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus/docusaurus.config.js b/docusaurus/docusaurus.config.js index 8b1d649404b4..9a8f7af7d44a 100644 --- a/docusaurus/docusaurus.config.js +++ b/docusaurus/docusaurus.config.js @@ -70,7 +70,7 @@ const config = { // to: 'https://glossary.airbyte.com/term/etl-elt-airbyte/', // }, // { - // from: '/understanding-airbyte/glossaryfull-refresh-sync', + // from: '/understanding-airbyte/glossary#full-refresh-sync', // to: 'https://glossary.airbyte.com/term/full-refresh-synchronization/', // }, // { From 41687888cf55ce1210bf1edcf1d0c88e782eee07 Mon Sep 17 00:00:00 2001 From: sspaeti Date: Mon, 3 Oct 2022 12:24:36 +0200 Subject: [PATCH 4/4] remove redirects outside of docs (done on DNS side) --- docusaurus/docusaurus.config.js | 48 --------------------------------- 1 file changed, 48 deletions(-) diff --git a/docusaurus/docusaurus.config.js b/docusaurus/docusaurus.config.js index 9a8f7af7d44a..b337880e0f1b 100644 --- a/docusaurus/docusaurus.config.js +++ b/docusaurus/docusaurus.config.js @@ -49,54 +49,6 @@ const config = { from: '/operator-guides/securing-airbyte', to: '/operator-guides/security', }, - // { - // from: '/understanding-airbyte/glossary', - // to: 'https://glossary.airbyte.com/term/airbyte-glossary-of-terms/', - // }, - // { - // from: '/understanding-airbyte/glossary#advanced-terms', - // to: 'https://glossary.airbyte.com/term/airbyte-glossary-of-terms/#advanced-terms', - // }, - // { - // from: '/understanding-airbyte/glossary#airbyte-cdk', - // to: 'https://glossary.airbyte.com/term/airbyte-cdk/', - // }, - // { - // from: '/understanding-airbyte/glossary#dag', - // to: 'https://glossary.airbyte.com/term/dag-directed-acyclic-graph/', - // }, - // { - // from: '/understanding-airbyte/glossary#etlelt', - // to: 'https://glossary.airbyte.com/term/etl-elt-airbyte/', - // }, - // { - // from: '/understanding-airbyte/glossary#full-refresh-sync', - // to: 'https://glossary.airbyte.com/term/full-refresh-synchronization/', - // }, - // { - // from: '/understanding-airbyte/glossary#incremental-sync', - // to: 'https://glossary.airbyte.com/term/incremental-synchronization/', - // }, - // { - // from: '/understanding-airbyte/glossary#partial-success', - // to: 'https://glossary.airbyte.com/term/partial-success', - // }, - // { - // from: '/understanding-airbyte/glossary#raw-tables', - // to: 'https://glossary.airbyte.com/term/raw-tables/', - // }, - // { - // from: '/understanding-airbyte/glossary#airbytecatalog', - // to: 'https://glossary.airbyte.com/term/airbyte-catalog/', - // }, - // { - // from: '/understanding-airbyte/glossary#airbyte-specification', - // to: 'https://glossary.airbyte.com/term/airbyte-specification/', - // }, - // { - // from: '/understanding-airbyte/glossary#temporal', - // to: 'https://glossary.airbyte.com/term/temporal/', - // }, // { // from: '/some-lame-path', // to: '/a-much-cooler-uri',