docker run --network host --gpus all --shm-size 16gb \
                                   -v /path/to/model/:/data:ro \
                                   llm_inference_server_release:0

# Override default parameters
# docker run --network host --gpus all --shm-size 16gb \
#            -e MODEL_TP=4 \
#            -e MODEL_PORT=30002 \
#            -e MODEL_NAME=model \
#            -e MODEL_MEM_FRACTION=0.85 \
#            -v /path/to/model/:/data:ro \
#            llm_inference_server_release:0
