Skip to content

Commit

Permalink
Merge branch 'develop' into electra_inputs_embeds
Browse files Browse the repository at this point in the history
  • Loading branch information
sijunhe authored Oct 13, 2022
2 parents c18cd26 + c65dbb4 commit 57aadef
Show file tree
Hide file tree
Showing 18 changed files with 721 additions and 63 deletions.
18 changes: 16 additions & 2 deletions applications/question_answering/faq_finance/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -399,10 +399,24 @@ python milvus_ann_search.py --data_path data/qa_pair.csv \

#### Paddle Serving 部署

Paddle Serving 的安装可以参考[Paddle Serving 安装文档](https://github.com/PaddlePaddle/Serving#installation)。需要在服务端和客户端安装相关的依赖,安装完依赖后就可以执行下面的步骤。
Paddle Serving 的安装可以参考[Paddle Serving 安装文档](https://github.com/PaddlePaddle/Serving#installation)。需要在服务端和客户端安装相关的依赖,用pip安装Paddle Serving的依赖如下:

```
pip install paddle-serving-client==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install paddle-serving-app==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/simple
# 如果是CPU部署,只需要安装CPU Server
pip install paddle-serving-server==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/simple
首先把生成的静态图模型导出为 Paddle Serving的格式,命令如下:
# 如果是GPU Server,需要确认环境再选择执行哪一条,推荐使用CUDA 10.2的包
# CUDA10.2 + Cudnn7 + TensorRT6(推荐)
pip install paddle-serving-server-gpu==0.8.3.post102 -i https://pypi.tuna.tsinghua.edu.cn/simple
# CUDA10.1 + TensorRT6
pip install paddle-serving-server-gpu==0.8.3.post101 -i https://pypi.tuna.tsinghua.edu.cn/simple
# CUDA11.2 + TensorRT8
pip install paddle-serving-server-gpu==0.8.3.post112 -i https://pypi.tuna.tsinghua.edu.cn/simple
```
更详细的安装信息请参考[链接]((https://github.com/PaddlePaddle/Serving/blob/v0.9.0/doc/Install_Linux_Env_CN.md)),安装完依赖后就可以执行下面的步骤。首先把生成的静态图模型导出为 Paddle Serving的格式,命令如下:

```
python export_to_serving.py \
Expand Down
3 changes: 0 additions & 3 deletions applications/question_answering/faq_finance/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,4 @@ paddlepaddle-gpu>=2.2.3
hnswlib>=0.5.2
numpy>=1.17.2
visualdl>=2.2.2
paddle-serving-app>=0.7.0
paddle-serving-client>=0.7.0
paddle-serving-server-gpu>=0.7.0.post102
pybind11
23 changes: 22 additions & 1 deletion examples/machine_reading_comprehension/SQuAD/args.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse


Expand Down Expand Up @@ -78,7 +92,7 @@ def parse_args():
help="random seed for initialization")
parser.add_argument(
'--device',
choices=['cpu', 'gpu'],
choices=['cpu', 'gpu', 'mlu'],
default="gpu",
help="Select which device to train model, defaults to gpu.")
parser.add_argument(
Expand Down Expand Up @@ -131,5 +145,12 @@ def parse_args():
parser.add_argument("--do_predict",
action='store_true',
help="Whether to predict.")
parser.add_argument("--use_amp",
action='store_true',
help="Whether to use AMP.")
parser.add_argument("--scale_loss",
type=float,
default=2**15,
help="The value of scale_loss for fp16.")
args = parser.parse_args()
return args
37 changes: 28 additions & 9 deletions examples/machine_reading_comprehension/SQuAD/run_squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,27 +288,46 @@ def run(args):
apply_decay_param_fun=lambda x: x in decay_params)
criterion = CrossEntropyLossForSQuAD()

if args.use_amp:
scaler = paddle.amp.GradScaler(init_loss_scaling=args.scale_loss)

global_step = 0
tic_train = time.time()

for epoch in range(num_train_epochs):
for step, batch in enumerate(train_data_loader):
global_step += 1
logits = model(input_ids=batch['input_ids'],
token_type_ids=batch['token_type_ids'],
attention_mask=batch['attention_mask'])
loss = criterion(
logits, (batch['start_positions'], batch['end_positions']))
if args.use_amp:
with paddle.amp.auto_cast(
args.use_amp,
custom_white_list=["layer_norm", "softmax",
"gelu"]):
logits = model(input_ids=batch['input_ids'],
token_type_ids=batch['token_type_ids'],
attention_mask=batch['attention_mask'])
loss = criterion(
logits,
(batch['start_positions'], batch['end_positions']))
scaler.scale(loss).backward()
scaler.minimize(optimizer, loss)
else:
logits = model(input_ids=batch['input_ids'],
token_type_ids=batch['token_type_ids'],
attention_mask=batch['attention_mask'])
loss = criterion(
logits,
(batch['start_positions'], batch['end_positions']))
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.clear_grad()

if global_step % args.logging_steps == 0:
print(
"global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s"
% (global_step, epoch + 1, step + 1, loss,
args.logging_steps / (time.time() - tic_train)))
tic_train = time.time()
loss.backward()
optimizer.step()
lr_scheduler.step()
optimizer.clear_grad()

if global_step % args.save_steps == 0 or global_step == num_training_steps:
if rank == 0:
Expand Down
7 changes: 6 additions & 1 deletion model_zoo/uie/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ def reader(data_path, max_seq_len=512):
if len(content) <= max_content_len:
yield json_line
else:
if result['end'] - result['start'] > max_content_len:
logger.warn(
"result['end '] - result ['start'] exceeds max_content_len, which will result in no valid instance being returned"
)
result_list = json_line['result_list']
json_lines = []
accumulate = 0
Expand All @@ -230,7 +234,8 @@ def reader(data_path, max_seq_len=512):

for result in result_list:
if result['start'] + 1 <= max_content_len < result[
'end']:
'end'] and result['end'] - result[
'start'] <= max_content_len:
max_content_len = result['start']
break

Expand Down
48 changes: 39 additions & 9 deletions paddlenlp/transformers/electra/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
from paddle.nn.layer.transformer import _convert_attention_mask

from .. import PretrainedModel, register_base_model
from ..model_outputs import (BaseModelOutput, SequenceClassifierOutput,
TokenClassifierOutput,
from ..model_outputs import (BaseModelOutputWithPastAndCrossAttentions,
SequenceClassifierOutput, TokenClassifierOutput,
QuestionAnsweringModelOutput,
MultipleChoiceModelOutput, MaskedLMOutput,
tuple_output)
Expand Down Expand Up @@ -153,9 +153,12 @@ def forward(self,
src_mask=src_mask,
output_attentions=output_attentions)
else:
cache_wrapper = cache[i] if isinstance(
cache[i], nn.MultiHeadAttention.Cache
) else nn.MultiHeadAttention.Cache(*cache[i])
output, new_cache = mod(output,
src_mask=src_mask,
cache=cache[i],
cache=cache_wrapper,
output_attentions=output_attentions)
new_caches.append(new_cache)
if output_attentions:
Expand All @@ -174,14 +177,13 @@ def forward(self,
if not return_dict:
if output_attentions or output_hidden_states:
output = (output, all_attentions, all_hidden_states)

return output if cache is None else (output, new_caches)

return BaseModelOutput(
return BaseModelOutputWithPastAndCrossAttentions(
last_hidden_state=output,
hidden_states=all_hidden_states,
attentions=all_attentions,
)
past_key_values=new_caches)


class ElectraEmbeddings(nn.Layer):
Expand All @@ -203,12 +205,14 @@ def forward(self,
input_ids,
token_type_ids=None,
position_ids=None,
inputs_embeds=None):

inputs_embeds=None,
past_key_values_length=None):
if position_ids is None:
ones = paddle.ones_like(input_ids, dtype="int64")
seq_length = paddle.cumsum(ones, axis=-1)
position_ids = seq_length - ones
if past_key_values_length is not None:
position_ids += past_key_values_length
position_ids.stop_gradient = True
position_ids = position_ids.astype("int64")

Expand Down Expand Up @@ -559,6 +563,8 @@ def forward(self,
position_ids=None,
attention_mask=None,
inputs_embeds=None,
past_key_values=None,
use_cache=None,
output_attentions=False,
output_hidden_states=False,
return_dict=False):
Expand Down Expand Up @@ -599,6 +605,17 @@ def forward(self,
This is useful for use cases such as P-Tuning, where you want more control over how to convert input_ids indices
into the embedding space.
Its data type should be `float32` and it has a shape of [batch_size, sequence_length, embedding_size].
past_key_values (tuple(tuple(Tensor)), optional):
Precomputed key and value hidden states of the attention blocks of each layer. This can be used to speedup
auto-regressive decoding for generation tasks or to support use cases such as Prefix-Tuning where vectors are prepended
to each attention layer. The length of tuple equals to the number of layers, and each tuple having 2 tensors of shape
`(batch_size, num_heads, past_key_values_length, embed_size_per_head)`)
If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
`input_ids` of shape `(batch_size, sequence_length)`.
use_cache (`bool`, optional):
If set to `True`, `past_key_values` key value states are returned.
Defaults to `None`.
output_hidden_states (bool, optional):
Whether to return the hidden states of all layers.
Defaults to `False`.
Expand Down Expand Up @@ -627,27 +644,40 @@ def forward(self,
output = model(**inputs)
'''
past_key_values_length = None
if past_key_values is not None:
past_key_values_length = past_key_values[0][0].shape[2]

if attention_mask is None:
attention_mask = paddle.unsqueeze(
(input_ids == self.pad_token_id).astype(
paddle.get_default_dtype()) * -1e4,
axis=[1, 2])
if past_key_values is not None:
batch_size = past_key_values[0][0].shape[0]
past_mask = paddle.zeros(
[batch_size, 1, 1, past_key_values_length],
dtype=attention_mask.dtype)
attention_mask = paddle.concat([past_mask, attention_mask],
axis=-1)
else:
if attention_mask.ndim == 2:
attention_mask = attention_mask.unsqueeze(axis=[1, 2])

embedding_output = self.embeddings(input_ids=input_ids,
position_ids=position_ids,
token_type_ids=token_type_ids,
inputs_embeds=inputs_embeds)
inputs_embeds=inputs_embeds,
past_key_values_length=past_key_values_length)

if hasattr(self, "embeddings_project"):
embedding_output = self.embeddings_project(embedding_output)

self.encoder._use_cache = use_cache # To be consistent with HF
encoder_outputs = self.encoder(
embedding_output,
attention_mask,
cache=past_key_values,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict)
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/transformers/gpt/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -1182,7 +1182,7 @@ def prepare_inputs_for_generation(self,
# only last token for inputs_ids if cache is defined in kwargs
position_ids = kwargs.get("position_ids", None)
attention_mask = kwargs.get("attention_mask", None)
if attention_mask is not None and len(attention_mask.shape) == 4:
if attention_mask is not None and attention_mask.ndim == 4:
attention_mask = attention_mask[:, -1:, -1:, :]
if cache is not None:
input_ids = input_ids[:, -1].unsqueeze(-1)
Expand Down
16 changes: 16 additions & 0 deletions pipelines/examples/semantic-search/semantic_search_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,22 @@ def semantic_search_tutorial():
})

print_documents(prediction)
# Batch prediction
predictions = pipe.run_batch(queries=["亚马逊河流的介绍", '期货交易手续费指的是什么?'],
params={
"Retriever": {
"top_k": 50
},
"Ranker": {
"top_k": 5
}
})
for i in range(len(predictions['queries'])):
result = {
'documents': predictions['documents'][i],
'query': predictions['queries'][i]
}
print_documents(result)


if __name__ == "__main__":
Expand Down
31 changes: 24 additions & 7 deletions pipelines/pipelines/nodes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,16 +127,33 @@ def _dispatch_run(self, **kwargs) -> Tuple[Dict, str]:
- collate `_debug` information if present
- merge component output with the preceding output and pass it on to the subsequent Component in the Pipeline
"""
return self._dispatch_run_general(self.run, **kwargs)

def _dispatch_run_batch(self, **kwargs):
"""
The Pipelines call this method when run_batch() is executed. This method in turn executes the
_dispatch_run_general() method with the correct run method.
"""
return self._dispatch_run_general(self.run_batch, **kwargs)

def _dispatch_run_general(self, run_method: Callable, **kwargs):
"""
This method takes care of the following:
- inspect run_method's signature to validate if all necessary arguments are available
- pop `debug` and sets them on the instance to control debug output
- call run_method with the corresponding arguments and gather output
- collate `_debug` information if present
- merge component output with the preceding output and pass it on to the subsequent Component in the Pipeline
"""
arguments = deepcopy(kwargs)
params = arguments.get("params") or {}

run_signature_args = inspect.signature(self.run).parameters.keys()
run_signature_args = inspect.signature(run_method).parameters.keys()

run_params: Dict[str, Any] = {}
for key, value in params.items():
if key == self.name: # targeted params for this node
if isinstance(value, dict):

# Extract debug attributes
if "debug" in value.keys():
self.debug = value.pop("debug")
Expand All @@ -156,19 +173,19 @@ def _dispatch_run(self, **kwargs) -> Tuple[Dict, str]:
if key in run_signature_args:
run_inputs[key] = value

output, stream = self.run(**run_inputs, **run_params)
output, stream = run_method(**run_inputs, **run_params)

# Collect debug information
debug_info = {}
if getattr(self, "debug", None):
# Include input
debug_info["input"] = {**run_inputs, **run_params}
debug_info["input"]["debug"] = self.debug
# Include output
# Include output, exclude _debug to avoid recursion
filtered_output = {
key: value
for key, value in output.items() if key != "_debug"
} # Exclude _debug to avoid recursion
}
debug_info["output"] = filtered_output
# Include custom debug info
custom_debug = output.get("_debug", {})
Expand All @@ -182,9 +199,9 @@ def _dispatch_run(self, **kwargs) -> Tuple[Dict, str]:
if all_debug:
output["_debug"] = all_debug

# add "extra" args that were not used by the node
# add "extra" args that were not used by the node, but not the 'inputs' value
for k, v in arguments.items():
if k not in output.keys():
if k not in output.keys() and k != "inputs":
output[k] = v

output["params"] = params
Expand Down
Loading

0 comments on commit 57aadef

Please sign in to comment.