adding tools to be able to profile model fwds to see what to turn into kernels

(cherry picked from commit 6db5b126b6)
This commit is contained in:
cm2435 2024-02-12 22:31:05 +00:00 committed by Daniel Han
parent 3ab282fd40
commit 12898b5bef

View file

@ -0,0 +1,27 @@
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth.kernels.utils import profile_generate_method
torch.set_default_device("cuda")
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
inputs = tokenizer('''def print_prime(n):
"""
Print all primes between 1 and n
"""''', return_tensors="pt", return_attention_mask=False)
generate_args = {
**inputs, # Assuming model_inputs is a dictionary with appropriate keys
"max_new_tokens": 100,
"do_sample": True
}
# Ensure your model and tokenizer are properly loaded and set up as before.
# Now, call the profile_generate_method function
prof = profile_generate_method(model, generate_args)