merge with JZ

2025-06-29 16:41:11 +00:00 · 2024-10-18 14:59:40 -04:00 · 2024-10-18 14:59:40 -04:00 · ab86699c88
commit ab86699c88
parent 8da3149581 2ea05a1cd6
7 changed files with 226 additions and 131 deletions
--- a/benchmarks/bench_pm.py
+++ b/benchmarks/bench_pm.py
@ -26,14 +26,17 @@ from jax.sharding import PartitionSpec as P
 from jaxpm.kernels import interpolate_power_spectrum
 from jaxpm.painting import cic_paint_dx
 from jaxpm.pm import linear_field, lpt, make_ode_fn
-
+from jax import make_jaxpr

 def run_simulation(mesh_shape,
                   box_size,
                   halo_size,
                   solver_choice,
                   iterations,
-                   pdims=None):
+                   hlo_print,
+                   trace,
+                   pdims=None,
+                   output_path="."):

    @jax.jit
    def simulate(omega_c, sigma8):
@ -60,7 +63,6 @@ def run_simulation(mesh_shape,
            solver = Tsit5()
        elif solver_choice == "lpt":
            lpt_field = cic_paint_dx(dx, halo_size=halo_size)
-            print(f"TYPE of lpt_field: {type(lpt_field)}")
            return lpt_field, {"num_steps": 0}
        else:
            raise ValueError(
@ -92,6 +94,7 @@ def run_simulation(mesh_shape,

    def run():
        # Warm start
+<<<<<<< HEAD
        chrono_fun = Timer()
        RangePush("warmup")
        final_field, stats = chrono_fun.chrono_jit(simulate,
@ -108,6 +111,39 @@ def run_simulation(mesh_shape,
                                                       ndarray_arg=0)
            RangePop()
        return final_field, stats, chrono_fun
+=======
+        if hlo_print:
+          jaxpr = make_jaxpr(simulate)(0.32, 0.8)
+          lowered = jax.jit(simulate).lower(0.32, 0.8)
+          lower_as_text = lowered.as_text()
+          compiled = lowered.compile()
+          compiled_again = jax.jit(simulate).lower(0.32, 0.8).compile()
+          return jaxpr ,  compiled , compiled_again
+        elif trace:
+          jit_output = f"{output_path}/jit_trace"
+          first_run_output = f"{output_path}/first_run_trace"
+          second_run_output = f"{output_path}/second_run_trace"
+          with jax.profiler.trace(jit_output , create_perfetto_trace=True):
+            final_field, stats = simulate(0.32, 0.8)
+            final_field.block_until_ready()
+          with jax.profiler.trace(first_run_output , create_perfetto_trace=True):
+            final_field, stats = simulate(0.32, 0.8)
+            final_field.block_until_ready()
+          with jax.profiler.trace(second_run_output , create_perfetto_trace=True):
+            final_field, stats = simulate(0.32, 0.8)
+            final_field.block_until_ready()
+        else:         
+          chrono_fun = Timer()
+          RangePush("warmup")
+          final_field, stats = chrono_fun.chrono_jit(simulate, 0.32, 0.8 , ndarray_arg = 0)
+          RangePop()
+          sync_global_devices("warmup")
+          for i in range(iterations):
+              RangePush(f"sim iter {i}")
+              final_field, stats = chrono_fun.chrono_fun(simulate, 0.32, 0.8 , ndarray_arg = 0)
+              RangePop()
+          return final_field, stats, chrono_fun
+>>>>>>> glab/ASKabalan/jaxdecomp_proto

    if jax.device_count() > 1:
        devices = mesh_utils.create_device_mesh(pdims)
@ -151,7 +187,7 @@ if __name__ == "__main__":
                        '--halo_size',
                        type=int,
                        help='Halo size',
-                        required=True)
+                        default=None)
    parser.add_argument('-s',
                        '--solver',
                        type=str,
@ -161,12 +197,7 @@ if __name__ == "__main__":
                            "LeapfrogMidpoint", "leapfrogmidpoint", "lfm",
                            "lpt"
                        ],
-                        required=True)
-    parser.add_argument('-i',
-                        '--iterations',
-                        type=int,
-                        help='Number of iterations',
-                        default=10)
+                        default="lpt")
    parser.add_argument('-o',
                        '--output_path',
                        type=str,
@ -181,15 +212,39 @@ if __name__ == "__main__":
                        type=int,
                        help='Number of nodes',
                        default=1)
+<<<<<<< HEAD

+=======
+    parser.add_argument('-i',
+                        '--iterations',
+                        type=int,
+                        help='Number of iterations',
+                        default=10)
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument('-hlo',
+                        '--hlo_print',
+                        action='store_true',
+                        help='Print hlo generated by XLA')
+    group.add_argument('-t',
+                        '--trace',
+                        action='store_true',
+                        help='Profile using tensorboard')
+    
+>>>>>>> glab/ASKabalan/jaxdecomp_proto
    args = parser.parse_args()
    mesh_size = args.mesh_size
    box_size = [args.box_size] * 3
-    halo_size = args.halo_size
+    halo_size = args.mesh_size // 8 if args.halo_size is None else args.halo_size
    solver_choice = args.solver
    iterations = args.iterations
    output_path = args.output_path
    os.makedirs(output_path, exist_ok=True)
+<<<<<<< HEAD
+=======
+    hlo_print = args.hlo_print
+    trace = args.trace
+    nb_gpus = jax.device_count()
+>>>>>>> glab/ASKabalan/jaxdecomp_proto

    print(f"solver choice: {solver_choice}")
    match solver_choice:
@ -213,9 +268,11 @@ if __name__ == "__main__":
    if args.pdims:
        pdims = tuple(map(int, args.pdims.split("x")))
    else:
-        pdims = (1, 1)
+        pdims = (1, jax.device_count())
+    pdm_str = f"{pdims[0]}x{pdims[1]}"

    mesh_shape = [mesh_size] * 3
+<<<<<<< HEAD

    final_field, stats, chrono_fun = run_simulation(mesh_shape, box_size,
                                                    halo_size, solver_choice,
@ -226,6 +283,50 @@ if __name__ == "__main__":
    )

    metadata = {
+=======
+    
+    if trace:
+      trace_folder = f"{output_path}/profiling/jaxpm/{nb_gpus}/{mesh_shape[0]}_{int(box_size[0])}/{pdm_str}/{solver_choice}/halo_{halo_size}"
+      os.makedirs(trace_folder, exist_ok=True)
+      run_simulation(mesh_shape, box_size, halo_size, solver_choice, iterations, hlo_print, trace, pdims, trace_folder)
+      print(f"Profiling done! Check {trace_folder}")
+    elif hlo_print:
+      hlo_folder = f"{output_path}/hlo/jaxpm/{nb_gpus}/{mesh_shape[0]}_{int(box_size[0])}/{pdm_str}/{solver_choice}/halo_{halo_size}"
+      os.makedirs(hlo_folder, exist_ok=True)
+      jaxpr , compiled , compiled2 = run_simulation(mesh_shape, box_size, halo_size, solver_choice, iterations, hlo_print, trace, pdims, hlo_folder)
+      print(f"type of memory analysis {type(compiled.memory_analysis())}")
+      print(f"memory analysis {compiled.memory_analysis()}")
+      print(f"memory analysis again {compiled2.memory_analysis()}")
+      jax.tree.map(lambda x: print(x), compiled.memory_analysis())
+      with open(f'{hlo_folder}/hlo_jaxpm.md', 'w') as f:
+          f.write(f"# JAXPM HLO\n")
+          f.write(f"## Args: {args}\n")
+          f.write(f"## JAXPR is \n")
+          f.write(f'---\n')
+          f.write(f"{jaxpr}\n")
+          f.write(f'---\n')
+          f.write(f"Lowered as text is \n")
+          f.write(f'---\n')
+          # f.write(f"{lower_as_text}\n")
+          f.write(f'---\n')
+          f.write(f"Compiled is \n")
+          f.write(f'---\n')
+          f.write(f"{compiled.as_text()}\n")
+          f.write(f"Cost analysis is \n")
+          f.write(f'---\n')
+          f.write(f"{compiled.cost_analysis()[0]['flops']}\n")
+          f.write(f'---\n')
+          f.write(f"Memory analysis is \n")
+          f.write(f'---\n')
+          f.write(f"{compiled.memory_analysis()}\n")
+          f.write(f'---\n')
+
+      print(f"Saved HLO to {hlo_folder}")
+    else:
+      final_field, stats, chrono_fun = run_simulation(mesh_shape, box_size, halo_size, solver_choice, iterations, hlo_print, trace, pdims, output_path)
+      print(f"shape of final_field {final_field.shape} and sharding spec {final_field.sharding} and local shape {final_field.addressable_data(0).shape}")
+      metadata = {
+>>>>>>> glab/ASKabalan/jaxdecomp_proto
        'rank': rank,
        'function_name': f'JAXPM-{solver_choice}',
        'precision': args.precision,
@ -236,6 +337,7 @@ if __name__ == "__main__":
        'py': str(pdims[1]),
        'backend': 'NCCL',
        'nodes': str(args.nodes)
+<<<<<<< HEAD
    }
    # Print the results to a CSV file
    chrono_fun.print_to_csv(f'{output_path}/jaxpm_benchmark.csv', **metadata)
@ -254,8 +356,27 @@ if __name__ == "__main__":
    if args.save_fields:
        np.save(f'{field_folder}/final_field_0_{rank}.npy',
                final_field.addressable_data(0))
+=======
+      }
+      # Print the results to a CSV file
+      chrono_fun.print_to_csv(f'{output_path}/jaxpm_benchmark.csv', **metadata)

-    print(f"Finished! ")
-    print(f"Stats {stats}")
-    print(f"Saving to {output_path}/jax_pm_benchmark.csv")
-    print(f"Saving field and logs in {field_folder}")
+      # Save the final field
+>>>>>>> glab/ASKabalan/jaxdecomp_proto
+
+      field_folder = f"{output_path}/final_field/jaxpm/{nb_gpus}/{mesh_size}_{int(box_size[0])}/{pdm_str}/{solver_choice}/halo_{halo_size}"
+      os.makedirs(field_folder, exist_ok=True)
+      with open(f'{field_folder}/jaxpm.log', 'w') as f:
+          f.write(f"Args: {args}\n")
+          f.write(f"JIT time: {chrono_fun.jit_time:.4f} ms\n")
+          for i , time in enumerate(chrono_fun.times):
+            f.write(f"Time {i}: {time:.4f} ms\n")
+          f.write(f"Stats: {stats}\n")
+      if args.save_fields:
+          np.save(f'{field_folder}/final_field_0_{rank}.npy',
+                  final_field.addressable_data(0))
+
+      print(f"Finished! ")
+      print(f"Stats {stats}")
+      print(f"Saving to {output_path}/jax_pm_benchmark.csv")
+      print(f"Saving field and logs in {field_folder}")