Skip to content

Commit

Permalink
[Neo] Neo compilation/quantization script bugfixes (deepjavalibrary#2115
Browse files Browse the repository at this point in the history
)

(cherry picked from commit 88f84ba)
  • Loading branch information
a-ys authored and tosterberg committed Jul 18, 2024
1 parent 9d919a5 commit e22b1d3
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 3 deletions.
71 changes: 69 additions & 2 deletions serving/docker/partition/sm_neo_neuron_partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@
from sm_neo_utils import (InputConfiguration, CompilationFatalError,
write_error_to_file, get_neo_env_vars,
get_neo_compiler_flags, load_jumpstart_metadata)
from utils import extract_python_jar
from utils import extract_python_jar, load_properties
from properties_manager import PropertiesManager
from partition import PartitionService

PYTHON_CACHE_DIR = '/tmp/djlserving/cache'
_neuronxcc_version: Optional[str] = None
NEO_OPTIMIZED_MODEL_DIR = 'optimized_model'


def get_neuronxcc_version() -> str:
Expand Down Expand Up @@ -183,12 +184,12 @@ def initialize_partition_args_namespace(self):
is easier to construct.
"""
self.args.save_mp_checkpoint_path = self.OUTPUT_MODEL_DIRECTORY
self.args.engine = "Python"
# If skip_copy is not enabled, outputted configs are overwritten, and deployment fails.
self.args.skip_copy = True
# These attributes reflect the default values of the corresponding attributes
# in the partition argparser. PropertiesManager expects these attributes to be defined.
self.args.model_id = None
self.args.engine = None
self.args.tensor_parallel_degree = None
self.args.quantize = None

Expand Down Expand Up @@ -352,11 +353,18 @@ def construct_properties_manager_from_serving_properties(self):
Factory method used to construct a PropertiesManager from serving.properties
"""
self.args.properties_dir = self.INPUT_MODEL_DIRECTORY
self.properties[
"option.entryPoint"] = "djl_python.transformers_neuronx"
logging.debug(
"Constructing PropertiesManager from "
f"serving.properties\nargs:{self.args}\nprops:{self.properties}")
self.properties_manager = PropertiesManager(
self.args, addl_properties=self.properties)
if not self.properties_manager.properties.get(
"option.tensor_parallel_degree"):
raise InputConfiguration(
"Tensor parallel degree not specified. This is required for Neuron compilation"
)

def run_partition(self) -> str:
"""
Expand All @@ -371,6 +379,63 @@ def run_partition(self) -> str:
f"Encountered an error during Transformers-NeuronX compilation: {exc}"
)

def write_properties(self) -> str:
"""
Updates outputted serving.properties.
engine=Python & option.entryPoint=djl_python.transformers_neuronx are hard-coded for Neo partitioning.
This function outputs the customer inputs for these fields.
"""
customer_properties = load_properties(self.INPUT_MODEL_DIRECTORY)
passthrough_properties = {}
passthrough_properties["engine"] = customer_properties.get('engine')
passthrough_properties["option.entryPoint"] = os.environ.get(
"OPTION_ENTRYPOINT") if os.environ.get(
"OPTION_ENTRYPOINT") else customer_properties.get(
"option.entryPoint")

output_properties = self.properties_manager.properties
output_passthrough_properties = {}
for k, v in passthrough_properties.items():
output_properties.pop(k, None)
if v:
logging.info(
f"User passed {k}={v}. Outputting in serving.properties")
output_passthrough_properties[k] = v

# Write out properties without pass-through properties
self.properties_manager.properties = output_properties
self.properties_manager.generate_properties_file()

output_passthrough_properties[
"option.model_id"] = f"./{NEO_OPTIMIZED_MODEL_DIR}"
# Write out pass-through properties
properties_file = os.path.join(self.OUTPUT_MODEL_DIRECTORY,
'serving.properties')
with open(properties_file, "a") as f:
for k, v in output_passthrough_properties.items():
f.write(f"{k}={v}\n")

def copy_input_files_to_output(self):
"""
Copies inputted files to output so that custom entrypoints or requirements files are preserved.
TODO: Avoid making redundant copies of model weights.
"""
# move outputted files to subdirectory
optimized_model_dir = os.path.abspath(
os.path.join(self.OUTPUT_MODEL_DIRECTORY, NEO_OPTIMIZED_MODEL_DIR))
os.mkdir(optimized_model_dir)
with os.scandir(self.OUTPUT_MODEL_DIRECTORY) as it:
for entry in it:
if os.path.abspath(entry.path) != optimized_model_dir:
shutil.move(entry.path, optimized_model_dir)

shutil.copytree(self.INPUT_MODEL_DIRECTORY,
self.OUTPUT_MODEL_DIRECTORY,
dirs_exist_ok=True)
self.write_properties()

def neo_partition(self):
self.update_neuron_cache_location()
self.initialize_partition_args_namespace()
Expand Down Expand Up @@ -410,6 +475,8 @@ def neo_partition(self):
cache_manager.create_jumpstart_neuron_cache_in_cache_dir(
self.jumpstart_metadata)

self.copy_input_files_to_output()


def main():
logging.basicConfig(stream=sys.stdout,
Expand Down
1 change: 0 additions & 1 deletion serving/docker/partition/sm_neo_quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def __init__(self):
self.OUTPUT_MODEL_DIRECTORY: Final[str] = env[2]
self.COMPILATION_ERROR_FILE: Final[str] = env[3]
self.HF_CACHE_LOCATION: Final[str] = env[5]
self.TARGET_INSTANCE_TYPE: Final[str] = env[6]

def update_dataset_cache_location(self):
logging.info(
Expand Down

0 comments on commit e22b1d3

Please sign in to comment.