From 4088d61da5479fe7578511ed2afd3059cbd2341a Mon Sep 17 00:00:00 2001 From: David Chanin Date: Thu, 23 Nov 2023 00:20:51 +0000 Subject: [PATCH] chore: add Sphinx docs (#1) * adding a basic docs page and validating in ci * adding deploy pages action --- .github/workflows/ci.yaml | 23 ++++ docs/_static/.gitkeep | 0 docs/about.rst | 34 ++++++ docs/conf.py | 46 ++++++++ docs/index.rst | 60 ++++++++++ docs/usage.rst | 235 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 + 7 files changed, 400 insertions(+) create mode 100644 docs/_static/.gitkeep create mode 100644 docs/about.rst create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/usage.rst diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6f300d9..e17dd1e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -24,6 +24,8 @@ jobs: run: poetry run pytest - name: build run: poetry build + - name: build docs + run: poetry run sphinx-build -b html docs docs/build release: needs: lint_test_and_build permissions: @@ -40,6 +42,9 @@ jobs: - uses: actions/checkout@v4 with: fetch-depth: 0 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" - name: Semantic Release id: release uses: python-semantic-release/python-semantic-release@v8.0.7 @@ -53,3 +58,21 @@ jobs: if: steps.release.outputs.released == 'true' with: github_token: ${{ secrets.GITHUB_TOKEN }} + # build and deploy docs to gh-pages + # do this as part of the semantic release step to ensure version is bumped + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + version: 1.4.0 + - name: Install dependencies + run: poetry install --no-interaction + - name: Sphinx build + run: | + poetry run sphinx-build docs _build + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + publish_branch: gh-pages + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: _build/ + force_orphan: true diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/about.rst b/docs/about.rst new file mode 100644 index 0000000..e628909 --- /dev/null +++ b/docs/about.rst @@ -0,0 +1,34 @@ +About +===== + +Acknowledgements +---------------- + +This library is inspired by and uses modified code from the following excellent projects: + +* `Locating and Editing Factual Associations in GPT `_ +* `Linearity of Relation Decoding in Transformer LMs `_ + +Contributing +------------ + +Any contributions to improve this project are welcome! Please open an issue or pull request in `Github repo `_ with bugfixes, changes, and improvements. + +License +------- + +Linear-Relational is released under a MIT license. + +Citation +-------- + +If you use this library in your work, please cite the following: + +.. code-block:: bibtex + + @article{chanin2023identifying, + title={Identifying Linear Relational Concepts in Large Language Models}, + author={David Chanin and Anthony Hunter and Oana-Maria Camburu}, + journal={arXiv preprint arXiv:2311.08968}, + year={2023} + } \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..acde6ed --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,46 @@ +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +from datetime import datetime + +sys.path.insert(0, os.path.abspath("..")) + +from linear_relational import __version__ + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "linear-relational" +copyright = f"{datetime.today().year}, David Chanin" +author = "David Chanin" + +release = __version__ + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.mathjax", +] + +pygments_style = "sphinx" + +templates_path = ["_templates"] +exclude_patterns = [".DS_Store"] + +autodoc_typehints = "none" + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "furo" +html_static_path = ["_static"] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..f7297e2 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,60 @@ +Linear-Relational +================================================= + +A library for working with Linear Relational Embeddings (LREs) and Linear Relational Concepts (LRCs) for LLMs in PyTorch + +.. image:: https://img.shields.io/pypi/v/linear-relational.svg?color=blue + :target: https://pypi.org/project/linear-relational + :alt: PyPI + +.. image:: https://img.shields.io/github/actions/workflow/status/chanind/linear-relational/ci.yaml?branch=main + :target: https://github.com/chanind/linear-relational + :alt: Build Status + + +Installation +------------ +Linear-Relational releases are hosted on `PyPI`_, and can be installed using `pip` as below: + +.. code-block:: bash + + pip install linear-relational + +This library assumes you're working with PyTorch and Huggingface Transformers. + +LREs and LRCs +------------- + +This library provides utilities and PyTorch modules for working with LREs and LRCs. LREs estimate the relation between a subject and object in a transformer language model (LM) as a linear map. + +This library assumes you're working with sentences with a subject, relation, and object. For instance, in the sentence: "Lyon is located in the country of France" would have the subject "Lyon", relation "located in country", and object "France". A LRE models a relation like "located in country" as a linear map consisting of a weight matrix :math:`W` and a bias term :math:`b`, so a LRE would map from the activations of the subject (Lyon) at layer :math:`l_s` to the activations of the object (France) at layer :math:`l_o`. So: + +.. math:: + LRE(s) = W s + b + +LREs can be inverted using a low-rank inverse, shown as :math:`LRE^{\dagger}`, to estimate :math:`s` from :math:`o`: + +.. math:: + LRE^{\dagger}(o) = W^{\dagger}(o - b) + +Linear Relational Concepts (LRCs) represent a concept :math:`(r, o)` as a direction vector $v$ on subject tokens, and can act like a simple linear classifier. For instance, while a LRE can represent the relation "located in country", we could learn a LRC for "located in the country: France", "located in country: Germany", "located in country: China", etc... This is just the result from passing in an object activation into the inverse LRE equation above. + +.. math:: + LRC(o) = W^{\dagger}(o - b) + +For more information on LREs and LRCs, check out `these `_ `papers `_. + + +.. toctree:: + :maxdepth: 2 + + usage + about + +.. toctree:: + :caption: Project Links + + GitHub + PyPI + +.. _PyPI: https://pypi.org/project/linear-relational/ diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..0200af1 --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,235 @@ +Usage +===== +This library assumes you're using PyTorch with a decoder-only generative language +model (e.g., GPT, LLaMa, etc...), and a tokenizer from Huggingface. + +Training a LRE +'''''''''''''' + +To train a LRE for a relation, first collect prompts which elicit the relation. +We provide a ``Prompt`` class to represent this data, and a ``Trainer`` class to make training +a LRE easy. Below, we train a LRE to represent the "located in country" relation. + +.. code-block:: python + + from transformers import GPT2LMHeadModel, GPT2TokenizerFast + from linear_relational import Prompt, Trainer + + # We load a generative LM from huggingface. The LMHead must be included. + model = GPT2LMHeadModel.from_pretrained("gpt2") + tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") + + # Prompts consist of text, an answer, and subject. + # The subject must appear in the text. The answer + # is what the model should respond with, and corresponds to the "object" + prompts = [ + Prompt("Paris is located in the country of", "France", subject="Paris"), + Prompt("Shanghai is located in the country of", "China", subject="Shanghai"), + Prompt("Kyoto is located in the country of", "Japan", subject="Kyoto"), + Prompt("San Jose is located in the country of", "Costa Rica", subject="San Jose"), + ] + + trainer = Trainer(model, tokenizer) + + lre = trainer.train_lre( + relation="located in country", + subject_layer=8, # subject layer must be before the object layer + object_layer=10, + prompts=prompts, + ) + +Working with a LRE +'''''''''''''''''' + +A LRE is a PyTorch module, so once a LRE is trained, we can use it to predict object activations from subject activations: + +.. code-block:: python + + object_acts_estimate = lre(subject_acts) + +We can also create a low-rank estimate of the LRE: + +.. code-block:: python + + low_rank_lre = lre.to_low_rank(50) + low_rank_obj_acts_estimate = low_rank_lre(subject_acts) + +Finally we can invert the LRE: + +.. code-block:: python + + inv_lre = lre.invert(rank=50) + subject_acts_estimate = inv_lre(object_acts) + +Training LRCs for a relation +'''''''''''''''''''''''''''' + +The ``Trainer`` can also create LRCs for a relation. Internally, this first create a LRE, inverts it, +then generates LRCs from each object in the relation. Objects refer to the answers in the prompts, +e.g. in the example above, "France" is an object, "Japan" is an object, etc... + +.. code-block:: python + + from transformers import GPT2LMHeadModel, GPT2TokenizerFast + from linear_relational import Prompt, Trainer + + # We load a generative LM from huggingface. The LMHead must be included. + model = GPT2LMHeadModel.from_pretrained("gpt2") + tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") + + # Prompts consist of text, an answer, and subject. + # The subject must appear in the text. The answer + # is what the model should respond with, and corresponds to the "object" + prompts = [ + Prompt("Paris is located in the country of", "France", subject="Paris"), + Prompt("Shanghai is located in the country of", "China", subject="Shanghai"), + Prompt("Kyoto is located in the country of", "Japan", subject="Kyoto"), + Prompt("San Jose is located in the country of", "Costa Rica", subject="San Jose"), + ] + + trainer = Trainer(model, tokenizer) + + concepts = trainer.train_relation_concepts( + relation="located in country", + subject_layer=8, + object_layer=10, + prompts=prompts, + max_lre_training_samples=10, + inv_lre_rank=50, + ) + +Causal editing +'''''''''''''' + +Once we have LRCs trained, we can use them to perform causal edits while the model is running. +For instance, we can perform a causal edit to make the model output that +"Shanghai is located in the country of France" by subtracting the +"located in country: China" concept from "Shanghai" and adding the +"located in country: France" concept. We can use the ``CausalEditor`` class to perform these edits. + +.. code-block:: python + + from linear_relational import CausalEditor + + concepts = trainer.train_relation_concepts(...) + + editor = CausalEditor(model, tokenizer, concepts=concepts) + + edited_answer = editor.swap_subject_concepts_and_predict_greedy( + text="Shanghai is located in the country of", + subject="Shanghai", + remove_concept="located in country: China", + add_concept="located in country: France", + edit_single_layer=8, + magnitude_multiplier=1.0, + predict_num_tokens=1, + ) + print(edited_answer) # " France" + + +Single-layer vs multi-layer edits +''''''''''''''''''''''''''''''''' + +Above we performed a single-layer edit, only modifying subject activations at layer 8. +However, we may want to perform an edit at all subject layers at the same time instead. +To do this, we can pass ``edit_single_layer=False`` to ``editor.swap_subject_concepts_and_predict_greedy()``. +We should also reduce the ``magnitude_multiplier`` since now we're going to make the edit at every layer, if we use +too large of a multiplier we'll drown out the rest of the activations in the model. The ``magnitude_multiplier`` is a +hyperparameter that requires tuning depending on the model being edited. + +.. code:: python + + from linear_relational import CausalEditor + + concepts = trainer.train_relation_concepts(...) + + editor = CausalEditor(model, tokenizer, concepts=concepts) + + edited_answer = editor.swap_subject_concepts_and_predict_greedy( + text="Shanghai is located in the country of", + subject="Shanghai", + remove_concept="located in country: China", + add_concept="located in country: France", + edit_single_layer=False, + magnitude_multiplier=0.1, + predict_num_tokens=1, + ) + print(edited_answer) # " France" + +Bulk editing +'''''''''''' + +Edits can be performed in batches to make better use of GPU resources using `editor.swap_subject_concepts_and_predict_greedy_bulk()` as below: + +.. code:: python + + from linear_relational import CausalEditor, ConceptSwapAndPredictGreedyRequest + + concepts = trainer.train_relation_concepts(...) + + editor = CausalEditor(model, tokenizer, concepts=concepts) + + swap_requests = [ + ConceptSwapAndPredictGreedyRequest( + text="Shanghai is located in the country of", + subject="Shanghai", + remove_concept="located in country: China", + add_concept="located in country: France", + predict_num_tokens=1, + ), + ConceptSwapAndPredictGreedyRequest( + text="Berlin is located in the country of", + subject="Berlin", + remove_concept="located in country: Germany", + add_concept="located in country: Japan", + predict_num_tokens=1, + ), + ] + edited_answers = editor.swap_subject_concepts_and_predict_greedy_bulk( + requests=swap_requests, + edit_single_layer=False, + magnitude_multiplier=0.1, + batch_size=4, + ) + print(edited_answers) # [" France", " Japan"] + +Concept matching +'''''''''''''''' + +We can use learned concepts (LRCs) to act like classifiers and match them against subject activations in sentences. +We can use the ``ConceptMatcher`` class to do this matching. + +.. code:: python + + from linear_relational import ConceptMatcher + + concepts = trainer.train_relation_concepts(...) + + matcher = ConceptMatcher(model, tokenizer, concepts=concepts) + + match_info = matcher.query("Beijing is a northern city", subject="Beijing") + + print(match_info.best_match.name) # located in country: China + print(match_info.betch_match.score) # 0.832 + +Bulk concept matching +''''''''''''''''''''' + +We can perform concept matches in batches to better utilize GPU resources using ``matcher.query_bulk()`` as below: + +.. code:: python + + from linear_relational import ConceptMatcher, ConceptMatchQuery + + concepts = trainer.train_relation_concepts(...) + + matcher = ConceptMatcher(model, tokenizer, concepts=concepts) + + match_queries = [ + ConceptMatchQuery("Beijng is a northern city", subject="Beijing"), + ConceptMatchQuery("I sawi him in Marseille", subject="Marseille"), + ] + matches = matcher.query_bulk(match_queries, batch_size=4) + + print(matches[0].best_match.name) # located in country: China + print(matches[1].best_match.name) # located in country: France diff --git a/pyproject.toml b/pyproject.toml index 4dbe287..28e86b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,8 @@ pytest = "^7.4.3" torch = "^2.1.1" sentencepiece = "^0.1.99" protobuf = "^4.25.1" +furo = "^2023.9.10" +pygments = "^2.17.2" [build-system] requires = ["poetry-core"]