fromtransformersimportAutoModelForCausalLM,AutoTokenizerdevice="cuda"model=AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-7B-Instruct-AWQ",device_map="auto")tokenizer=AutoTokenizer.from_pretrained("Qwen/Qwen2-7B-Instruct-AWQ")prompt="Give me a short introduction to large language model."messages=[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":prompt}]text=tokenizer.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)model_inputs=tokenizer([text],return_tensors="pt").to(device)generated_ids=model.generate(model_inputs.input_ids,max_new_tokens=512)generated_ids=[output_ids[len(input_ids):]forinput_ids,output_idsinzip(model_inputs.input_ids,generated_ids)]response=tokenizer.batch_decode(generated_ids,skip_special_tokens=True)[0]
curlhttp://localhost:8000/v1/chat/completions\-H"Content-Type: application/json"\-d'{ "model": "Qwen/Qwen2-7B-Instruct-AWQ", "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Tell me something about large language models."} ], }'
fromopenaiimportOpenAIopenai_api_key="EMPTY"openai_api_base="http://localhost:8000/v1"client=OpenAI(api_key=openai_api_key,base_url=openai_api_base,)chat_response=client.chat.completions.create(model="Qwen/Qwen2-7B-Instruct-AWQ",messages=[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Tell me something about large language models."},])print("Chat response:",chat_response)
fromawqimportAutoAWQForCausalLMfromtransformersimportAutoTokenizermodel_path="your_model_path"quant_path="your_quantized_model_path"quant_config={"zero_point":True,"q_group_size":128,"w_bit":4,"version":"GEMM"}# Load your tokenizer and model with AutoAWQtokenizer=AutoTokenizer.from_pretrained(model_path)model=AutoAWQForCausalLM.from_pretrained(model_path,device_map="auto",safetensors=True)
[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Tell me who you are."},{"role":"assistant","content":"I am a large language model named Qwen..."}]