Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adapted to the Ascend NPU #3933

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions server/knowledge_base/kb_cache/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ def load_embeddings(self, model: str = None, device: str = None) -> Embeddings:
embeddings.query_instruction = ""
else:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
if device == "npu":
import torch_npu
torch_npu.npu.set_device("npu:0")
embeddings = HuggingFaceEmbeddings(model_name=get_model_path(model),
model_kwargs={'device': device})
item.obj = embeddings
Expand Down
2 changes: 1 addition & 1 deletion server/llm_api_stale.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
parser.add_argument(
"--device",
type=str,
choices=["cpu", "cuda", "mps", "xpu"],
choices=["cpu", "cuda", "mps", "xpu", "npu"],
default="cuda",
help="The device type",
)
Expand Down
11 changes: 7 additions & 4 deletions server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,25 +512,28 @@ def _get_proxies():
def detect_device() -> Literal["cuda", "mps", "cpu"]:
try:
import torch
import mindspore as ms
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

第515行的import放在520行前面要好些,用cuda和mps的用户不需要引入这个mindspore就直接返回了。

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为什么要用mindspore而不是直接用torch_npu

if torch.cuda.is_available():
return "cuda"
if torch.backends.mps.is_available():
return "mps"
if ms.get_context(attr_key='device_target') == 'Ascend':
return "npu"
except:
pass
return "cpu"


def llm_device(device: str = None) -> Literal["cuda", "mps", "cpu"]:
def llm_device(device: str = None) -> Literal["cuda", "mps", "cpu", "npu"]:
device = device or LLM_DEVICE
if device not in ["cuda", "mps", "cpu"]:
if device not in ["cuda", "mps", "cpu", "npu"]:
device = detect_device()
return device


def embedding_device(device: str = None) -> Literal["cuda", "mps", "cpu"]:
def embedding_device(device: str = None) -> Literal["cuda", "mps", "cpu", "npu"]:
device = device or EMBEDDING_DEVICE
if device not in ["cuda", "mps", "cpu"]:
if device not in ["cuda", "mps", "cpu", "npu"]:
device = detect_device()
return device

Expand Down