Skip to content

Commit

Permalink
Start working on TensorFlow 2.0 compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
gpengzhi committed Nov 22, 2019
1 parent 676c66a commit e59f354
Show file tree
Hide file tree
Showing 355 changed files with 241 additions and 56,877 deletions.
32 changes: 24 additions & 8 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ignore=CVS

# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.
ignore-patterns=
ignore-patterns=.*_test.py

# Pickle collected data for later comparisons.
persistent=yes
Expand Down Expand Up @@ -65,7 +65,20 @@ confidence=
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use"--disable=all --enable=classes
# --disable=W"
disable=print-statement,parameter-unpacking,unpacking-in-except,old-raise-syntax,backtick,import-star-module-level,apply-builtin,basestring-builtin,buffer-builtin,cmp-builtin,coerce-builtin,execfile-builtin,file-builtin,long-builtin,raw_input-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,no-absolute-import,old-division,dict-iter-method,dict-view-method,next-method-called,metaclass-assignment,indexing-exception,raising-string,reload-builtin,oct-method,hex-method,nonzero-method,cmp-method,input-builtin,round-builtin,intern-builtin,unichr-builtin,map-builtin-not-iterating,zip-builtin-not-iterating,range-builtin-not-iterating,filter-builtin-not-iterating,using-cmp-argument,long-suffix,old-ne-operator,old-octal-literal,suppressed-message,useless-suppression
disable=invalid-name,too-many-branches,too-many-statements,too-many-arguments, # Unnecessarily strict checks
too-many-instance-attributes,too-few-public-methods,too-many-locals,
too-many-lines,too-many-return-statements,too-many-boolean-expressions,
too-many-ancestors,
fixme,
no-else-return,no-else-raise,len-as-condition,unnecessary-pass, # Not exactly good conventions
bad-continuation,inconsistent-return-statements,stop-iteration-return,
no-member,not-callable,invalid-unary-operand-type,arguments-differ, # We have mypy for this
no-name-in-module,unsubscriptable-object,import-error,
access-member-before-definition,
redefined-builtin,abstract-method,missing-docstring, # Too many false positives
no-self-use, # (cannot ignore overridden methods)
unused-wildcard-import, # (https://github.com/rogalski/astroid/commit/82c6ef644a2efb77217a23d9b8a6cfb5caffb4ba)
duplicate-code, # (will be fixed in next release)


[REPORTS]
Expand All @@ -82,7 +95,7 @@ output-format=text
files-output=no

# Tells whether to display a full report or only the messages
reports=yes
reports=no

# Python expression which should return a note less than 10 (10 is the highest
# note). You have access to the variables errors warning, statement which
Expand Down Expand Up @@ -253,7 +266,7 @@ ignore-comments=yes
ignore-docstrings=yes

# Ignore imports when computing similarities.
ignore-imports=no
ignore-imports=yes


[VARIABLES]
Expand All @@ -263,7 +276,7 @@ init-import=no

# A regular expression matching the name of dummy variables (i.e. expectedly
# not used).
dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy
dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy|unused

# List of additional names supposed to be defined in builtins. Remember that
# you should avoid to define new builtins when possible.
Expand Down Expand Up @@ -291,7 +304,8 @@ logging-modules=logging
max-line-length=80

# Regexp for a line that is allowed to be longer than the limit.
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
# This regex matches URLs and link anchors.
ignore-long-lines=^\s*((# )?`?<?https?://\S+>?|\.\. _`.*`:|`.*`_)$

# Allow the body of an if to be on the same line as the test if there is no
# else.
Expand All @@ -314,7 +328,7 @@ indent-string=' '
indent-after-paren=4

# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
expected-line-ending-format=
expected-line-ending-format=LF


[DESIGN]
Expand Down Expand Up @@ -367,7 +381,7 @@ valid-metaclass-classmethod-first-arg=mcs

# List of member names, which should be excluded from the protected access
# warning.
exclude-protected=_asdict,_fields,_replace,_source,_make
exclude-protected=_asdict,_fields,_replace,_source,_make,_get_name


[IMPORTS]
Expand Down Expand Up @@ -399,6 +413,8 @@ known-third-party=enchant
# only in one or another interpreter, leading to false positives when analysed.
analyse-fallback-blocks=no

allow-wildcard-with-all=yes


[EXCEPTIONS]

Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: 2

python:
version: 3.6
version: 3.7
install:
- requirements: docs/requirements.txt
27 changes: 15 additions & 12 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,29 @@ python:
install:
- pip install --upgrade pip
- pip install --progress-bar off .[tensorflow-cpu]
- pip install flake8==3.7.7
- pip install pylint==2.3.1 flake8==3.7.7
- pip install pytest

script:
# Linting
- pylint texar/ examples/
- flake8 texar/ examples/
# Unit tests
- pytest

jobs:
include:
- stage: docs
python: "3.7"
install:
- pip install --upgrade pip
- pip install --progress-bar off -r docs/requirements.txt
script:
- cd docs
# Build documents
- sphinx-build -b html -d _build/doctrees . _build/html
# jobs:
# include:
# - stage: docs
# python: "3.7"
# install:
# - pip install --upgrade pip
# - pip install --progress-bar off -r docs/requirements.txt
# script:
# - cd docs
# # Build documents
# - sphinx-build -W -b html -d _build/doctrees . _build/html
# # Check for typos
# - sphinx-build -W -b spelling -d _build/doctrees . _build/spelling

notifications:
email: false
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
### New features

* Support only Python 3.6 and 3.7. Drop support of older Python versions.
* Support TensorFlow 2.0.

### Feature improvements

Expand Down
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,15 @@
APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright {yyyy} {name of copyright owner}
Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
25 changes: 18 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
* **Two Versions, (Mostly) Same Interfaces**. Texar-TensorFlow (this repo) and **[Texar-PyTorch](https://github.com/asyml/texar-pytorch)** have mostly the same interfaces. Both further combine the best design of TF and PyTorch:
- Interfaces and variable sharing in *PyTorch convention*
- Excellent factorization and rich functionalities in *TF convention*.
* **Rich Pre-trained Models, Rich Usage with Uniform Interfaces**. BERT, GPT2, XLNet, etc, for encoding, classification, generation, and composing complex models with other Texar components!
* **Fully Customizable** at multiple abstraction level -- both novice-friendly and expert-friendly.
- Free to plug in whatever external modules, since Texar is fully compatible with the native TF/PyTorch APIs.
* **Versatile** to support broad tasks, models, algorithms, data processing, evaluation, etc.
* **Versatile** to support broad needs:
- data processing, model architectures, loss functions, training and inference algorithms, evaluation, ...
- encoder(s) to decoder(s), sequential- and self-attentions, memory, hierarchical models, classifiers...
- maximum likelihood learning, reinforcement learning, adversarial learning, probabilistic modeling, ...
* **Fully Customizable** at multiple abstraction level -- both novice-friendly and expert-friendly.
- Free to plug in whatever external modules, since Texar is fully compatible with the native TensorFlow/PyTorch APIs.
* **Modularized** for maximal re-use and clean APIs, based on principled decomposition of *Learning-Inference-Model Architecture*.
* **Rich Pre-trained Models, Rich Usage with Uniform Interfaces**. BERT, GPT2, XLNet, etc, for encoding, classification, generation, and composing complex models with other Texar components!
* **Distributed** model training with multiple GPUs.
* Clean, detailed [documentation](https://texar.readthedocs.io) and rich [examples](./examples).

Expand All @@ -33,8 +34,13 @@
<img src="./docs/_static/img/texar_stack.png"><br><br>
</div>

<div align="center">
<img src="./docs/_static/img/texar_modules_big.png"><br><br>
</div>

### Library API Example
Builds an encoder-decoder model, with maximum likelihood learning:

```python
import texar.tf as tx

Expand Down Expand Up @@ -72,6 +78,7 @@ outputs_bs, _, _ = tx.modules.beam_search_decode(
end_token=data.target_vocab.eos_token_id)
```
The same model, but with adversarial learning:

```python
helper = tx.modules.GumbelSoftmaxTraingHelper( # Gumbel-softmax decoding
start_tokens=[BOS]*batch_size, end_token=EOS, embedding=embedder)
Expand All @@ -85,6 +92,7 @@ G_loss, D_loss = tx.losses.binary_adversarial_losses(
discriminator_fn=discriminator)
```
The same model, but with RL policy gradient learning:

```python
agent = tx.agents.SeqPGAgent(samples=outputs.sample_id,
logits=outputs.logits,
Expand All @@ -99,16 +107,18 @@ Many more examples are available [here](./examples)

Texar requires:

* `tensorflow >= 1.10.0 (but < 2.0.0)`. Follow the [tensorflow official instructions](https://www.tensorflow.org/install) to install the appropriate version
* `tensorflow_probability >= 0.3.0 (but < 0.8.0)`. Follow the [tensorflow_probability official instractions](https://www.tensorflow.org/probability/install) to install.
* `tensorflow >= 2.0.0`. Follow the [tensorflow official instructions](https://www.tensorflow.org/install) to install the appropriate version
* `tensorflow_probability >= 0.3.0`. Follow the [tensorflow_probability official instractions](https://www.tensorflow.org/probability/install) to install.

After `tensorflow` and `tensorflow_probability` are installed, install Texar from PyPI:

```bash
pip install texar
```

To use cutting-edge features or develop locally, install from source:
```

```bash
git clone https://github.com/asyml/texar.git
cd texar
pip install .
Expand All @@ -120,6 +130,7 @@ pip install .

### Reference
If you use Texar, please cite the [tech report](https://arxiv.org/abs/1809.00794) with the following BibTex entry:

```
Texar: A Modularized, Versatile, and Extensible Toolkit for Text Generation
Zhiting Hu, Haoran Shi, Bowen Tan, Wentao Wang, Zichao Yang, Tiancheng Zhao, Junxian He, Lianhui Qin, Di Wang, Xuezhe Ma, Zhengzhong Liu, Xiaodan Liang, Wanrong Zhu, Devendra Sachan and Eric Xing
Expand Down
Binary file added docs/_static/img/texar_modules_big.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
104 changes: 0 additions & 104 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,107 +3,3 @@
Rich examples are included to demonstrate the use of Texar. The implementations of cutting-edge models/algorithms also provide references for reproducibility and comparisons.

More examples are continuously added...

## Examples by Models/Algorithms ##

### RNN / Seq2seq ###

* [language_model_ptb](./language_model_ptb): Basic RNN language model
* [distributed_gpu](./distributed_gpu): Basic RNN language model with distributed training
* [seq2seq_attn](./seq2seq_attn): Attentional seq2seq
* [seq2seq_configs](./seq2seq_configs): Seq2seq implemented with Texar model template
* [seq2seq_rl](./seq2seq_rl): Attentional seq2seq trained with policy gradient
* [seq2seq_exposure_bias](./seq2seq_exposure_bias): Various algorithms tackling exposure bias in sequence generation
* [hierarchical_dialog](./hierarchical_dialog): Hierarchical recurrent encoder-decoder model for conversation response generation
* [torchtext](./torchtext): Use of torchtext data loader

### Transformer (Self-attention) ###

* [transformer](./transformer): Transformer for machine translation
* [bert](./bert): Pre-trained BERT model for text representation
* [gpt-2](./gpt-2): Pre-trained OpenAI GPT-2 language model
* [vae_text](./vae_text): VAE with a transformer decoder for improved language modeling

### Variational Autoencoder (VAE) ###

* [vae_text](./vae_text): VAE language model

### GANs / Discriminiator-supervision ###

* [seqGAN](./seqgan): GANs for text generation
* [text_style_transfer](./text_style_transfer): Discriminator supervision for controlled text generation

### Reinforcement Learning ###

* [seq2seq_rl](./seq2seq_rl): Attentional seq2seq trained with policy gradient.
* [seqGAN](./seqgan): Policy gradient for sequence generation
* [rl_gym](./rl_gym): Various RL algoritms for games on OpenAI Gym

### Memory Network ###

* [memory_network_lm](./memory_network_lm): End-to-end memory network for language modeling

### Classifier / Sequence Prediction ###

* [bert](./bert): Pre-trained BERT model for text representation
* [sentence_classifier](./sentence_classifier): Basic CNN-based sentence classifier
* [sequence_tagging](./sequence_tagging): BiLSTM-CNN model for Named Entity Recognition (NER)

### Reward Augmented Maximum Likelihood (RAML) ###

* [seq2seq_exposure_bias](./seq2seq_exposure_bias): RAML and other learning algorithms for sequence generation

---

## Examples by Tasks

### Language Modeling ###

* [gpt-2](./gpt-2): Pre-trained OpenAI GPT-2 language model
* [language_model_ptb](./language_model_ptb): Basic RNN language model
* [vae_text](./vae_text): VAE language model
* [seqGAN](./seqgan): GAN + policy gradient
* [memory_network_lm](./memory_network_lm): End-to-end memory network for language modeling

### Machine Translation ###

* [seq2seq_attn](./seq2seq_attn): Attentional seq2seq
* [seq2seq_configs](./seq2seq_configs): Seq2seq implemented with Texar model template.
* [seq2seq_rl](./seq2seq_rl): Attentional seq2seq trained with policy gradient.
* [seq2seq_exposure_bias](./seq2seq_exposure_bias): Various algorithms tackling exposure bias in sequence generation (MT and summarization as examples).
* [transformer](./transformer): Transformer for machine translation

### Dialog ###

* [hierarchical_dialog](./hierarchical_dialog): Hierarchical recurrent encoder-decoder model for conversation response generation.

### Text Summarization ###

* [seq2seq_exposure_bias](./seq2seq_exposure_bias): Various algorithms tackling exposure bias in sequence generation (MT and summarization as examples).

### Text Style Transfer ###

* [text_style_transfer](./text_style_transfer): Discriminator supervision for controlled text generation

### Classification ###

* [bert](./bert): Pre-trained BERT model for text representation
* [sentence_classifier](./sentence_classifier): Basic CNN-based sentence classifier

### Sequence Tagging ###

* [sequence_tagging](./sequence_tagging): BiLSTM-CNN model for Named Entity Recognition (NER)

### Games ###

* [rl_gym](./rl_gym): Various RL algoritms for games on OpenAI Gym

---

## MISC ##

### Distributed training ###

* [distributed_gpu](./distributed_gpu): Basic example of distributed training.
* [bert](./bert): Distributed training of BERT.

5 changes: 0 additions & 5 deletions examples/bert/.gitignore

This file was deleted.

Loading

0 comments on commit e59f354

Please sign in to comment.