adding tools to be able to profile model fwds to see what to turn into kernels

This commit is contained in:
cm2435 2024-02-12 22:31:05 +00:00
parent 11aa5df3ad
commit 6db5b126b6
2 changed files with 44 additions and 8 deletions

View file

@ -0,0 +1,27 @@
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth.kernels.utils import profile_generate_method
torch.set_default_device("cuda")
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
inputs = tokenizer('''def print_prime(n):
"""
Print all primes between 1 and n
"""''', return_tensors="pt", return_attention_mask=False)
generate_args = {
**inputs, # Assuming model_inputs is a dictionary with appropriate keys
"max_new_tokens": 100,
"do_sample": True
}
# Ensure your model and tokenizer are properly loaded and set up as before.
# Now, call the profile_generate_method function
prof = profile_generate_method(model, generate_args)

View file

@ -94,14 +94,23 @@ def fast_dequantize(W, quant_state = None, out = None):
return out.t() if is_transposed else out
pass
def profile_nn_module(model : torch.nn.Module, inputs: tuple, logging: Optional[bool] = True, **kwargs)->torch.profiler.profile:
with profile(activities = [ProfilerActivity.CPU, ProfilerActivity.CUDA],
record_shapes = True, **kwargs) as prof:
with record_function("model_inference"):
model(*inputs)
pass
def profile_generate_method(model, generate_args: dict, logging: Optional[bool] = True, **kwargs):
"""
Profile the generate method of a transformer model.
if logging:
print(prof.key_averages().table(sort_by = "self_cuda_time_total"))
Args:
model: The transformer model with a generate method.
generate_args (dict): Arguments to pass to the model's generate method.
logging (Optional[bool]): If True, logs the profiling results. Default is True.
Returns:
torch.profiler.profile: The profiler object with recorded activities.
"""
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, **kwargs) as prof:
with record_function("generate_inference"):
model.generate(**generate_args)
if logging:
print(prof.key_averages().table(sort_by="cuda_time_total")) # Adjust sort_by if needed
return prof