mirror of
https://github.com/unslothai/unsloth
synced 2026-04-21 13:37:39 +00:00
adding tools to be able to profile model fwds to see what to turn into kernels
This commit is contained in:
parent
11aa5df3ad
commit
6db5b126b6
2 changed files with 44 additions and 8 deletions
27
tests/profiles/profile_phi2.py
Normal file
27
tests/profiles/profile_phi2.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
from unsloth.kernels.utils import profile_generate_method
|
||||
|
||||
torch.set_default_device("cuda")
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
|
||||
|
||||
inputs = tokenizer('''def print_prime(n):
|
||||
"""
|
||||
Print all primes between 1 and n
|
||||
"""''', return_tensors="pt", return_attention_mask=False)
|
||||
|
||||
|
||||
generate_args = {
|
||||
**inputs, # Assuming model_inputs is a dictionary with appropriate keys
|
||||
"max_new_tokens": 100,
|
||||
"do_sample": True
|
||||
}
|
||||
|
||||
# Ensure your model and tokenizer are properly loaded and set up as before.
|
||||
|
||||
# Now, call the profile_generate_method function
|
||||
prof = profile_generate_method(model, generate_args)
|
||||
|
||||
|
|
@ -94,14 +94,23 @@ def fast_dequantize(W, quant_state = None, out = None):
|
|||
return out.t() if is_transposed else out
|
||||
pass
|
||||
|
||||
def profile_nn_module(model : torch.nn.Module, inputs: tuple, logging: Optional[bool] = True, **kwargs)->torch.profiler.profile:
|
||||
with profile(activities = [ProfilerActivity.CPU, ProfilerActivity.CUDA],
|
||||
record_shapes = True, **kwargs) as prof:
|
||||
with record_function("model_inference"):
|
||||
model(*inputs)
|
||||
pass
|
||||
def profile_generate_method(model, generate_args: dict, logging: Optional[bool] = True, **kwargs):
|
||||
"""
|
||||
Profile the generate method of a transformer model.
|
||||
|
||||
if logging:
|
||||
print(prof.key_averages().table(sort_by = "self_cuda_time_total"))
|
||||
Args:
|
||||
model: The transformer model with a generate method.
|
||||
generate_args (dict): Arguments to pass to the model's generate method.
|
||||
logging (Optional[bool]): If True, logs the profiling results. Default is True.
|
||||
|
||||
Returns:
|
||||
torch.profiler.profile: The profiler object with recorded activities.
|
||||
"""
|
||||
with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, **kwargs) as prof:
|
||||
with record_function("generate_inference"):
|
||||
model.generate(**generate_args)
|
||||
|
||||
if logging:
|
||||
print(prof.key_averages().table(sort_by="cuda_time_total")) # Adjust sort_by if needed
|
||||
|
||||
return prof
|
||||
Loading…
Reference in a new issue