adding tools to be able to profile model fwds to see what to turn into kernels

2026-04-21 13:37:39 +00:00 · 2024-02-12 22:31:05 +00:00 · 2024-02-12 22:31:05 +00:00 · 6db5b126b6
commit 6db5b126b6
parent 11aa5df3ad
2 changed files with 44 additions and 8 deletions
--- a/tests/profiles/profile_phi2.py
+++ b/tests/profiles/profile_phi2.py
@ -0,0 +1,27 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from unsloth.kernels.utils import profile_generate_method
+
+torch.set_default_device("cuda")
+
+model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
+
+inputs = tokenizer('''def print_prime(n):
+   """
+   Print all primes between 1 and n
+   """''', return_tensors="pt", return_attention_mask=False)
+
+
+generate_args = {
+    **inputs,  # Assuming model_inputs is a dictionary with appropriate keys
+    "max_new_tokens": 100,
+    "do_sample": True
+}
+
+# Ensure your model and tokenizer are properly loaded and set up as before.
+
+# Now, call the profile_generate_method function
+prof = profile_generate_method(model, generate_args)
+
--- a/unsloth/kernels/utils.py
+++ b/unsloth/kernels/utils.py
@ -94,14 +94,23 @@ def fast_dequantize(W, quant_state = None, out = None):
    return out.t() if is_transposed else out
 pass

-def profile_nn_module(model : torch.nn.Module, inputs: tuple, logging: Optional[bool] = True, **kwargs)->torch.profiler.profile:
-    with profile(activities = [ProfilerActivity.CPU, ProfilerActivity.CUDA],
-                 record_shapes = True, **kwargs) as prof:
-        with record_function("model_inference"):
-            model(*inputs)
-        pass
+def profile_generate_method(model, generate_args: dict, logging: Optional[bool] = True, **kwargs):
+    """
+    Profile the generate method of a transformer model.

-    if logging:
-        print(prof.key_averages().table(sort_by = "self_cuda_time_total"))
+    Args:
+        model: The transformer model with a generate method.
+        generate_args (dict): Arguments to pass to the model's generate method.
+        logging (Optional[bool]): If True, logs the profiling results. Default is True.
+
+    Returns:
+        torch.profiler.profile: The profiler object with recorded activities.
+    """
+    with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True, **kwargs) as prof:
+        with record_function("generate_inference"):
+            model.generate(**generate_args)
    
+    if logging:
+        print(prof.key_averages().table(sort_by="cuda_time_total"))  # Adjust sort_by if needed
+
    return prof