diff --git a/analysis/slices.py b/analysis/slices.py old mode 100644 new mode 100755 diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/scola_submit.py b/scripts/scola_submit.py index e54714e..637b122 100644 --- a/scripts/scola_submit.py +++ b/scripts/scola_submit.py @@ -152,9 +152,15 @@ def check_previous_jobs(args,job_ids_array,box,resubmit_count,error_count): if job_id is None: print(f"Error submitting job for box {box}") error_count+=1 + # Check if the error count exceeds the maximum + if error_count >= MAX_ERRORS: + raise RuntimeError(f"Error count exceeded {MAX_ERRORS}. Stopping job submission.") else: job_ids_array[prev_box-1] = int(job_id) resubmit_count += 1 + # Check if the resubmit count exceeds the maximum + if resubmit_count >= MAX_RESUBMIT: + raise RuntimeError(f"Resubmit count exceeded {MAX_RESUBMIT}. Stopping job submission.") job_status_categories[status].append(prev_box) # Classify as failed # Sleep for a while before resubmitting the next job time.sleep(args.sleep) @@ -199,11 +205,11 @@ def print_summary_job_status(job_status_categories, box, resubmit_count, error_c # Print summary of job statuses print(f"Job statuses after box {box}:") # Print a table with columns for each status and below the % of jobs in that status - row0 = f"{'Status':<10}" + row0 = f"{'Status':<14}" for status in job_status_categories.keys(): - row0 += f"{status:>10}" + row0 += f"{status:>9} " print(row0) - row1 = f"{'Percentage':<10}" + row1 = f"{'Percentage':<14}" for status in job_status_categories.keys(): row1 += f"{len(job_status_categories[status])/box*100:>9.1f}%" print(row1) @@ -220,13 +226,13 @@ if __name__ == "__main__": parser.add_argument("-sd", "--slurmdir", type=str, default=None, help="Directory where the slurm scripts are saved (default is -d/slurm_scripts).") parser.add_argument("-wd", "--workdir", type=str, default=None, help="Directory where the tiles are saved (default is -d/work).") - parser.add_argument("-sf","--slurmfile", type=str, default="scola_sCOLA.sh", help="Slurm script file (located in slurmdir).") - parser.add_argument("-tf","--tilefile", type=str, default="scola_tile", help="Tile file name (located in workdir).") - parser.add_argument("--jobname", type=str, default="sCOLA_", help="Job name for the slurm jobs.") + parser.add_argument("-sf","--slurmfile", type=str, default="scola_sCOLA.sh", help="Slurm script file (located in slurmdir, default is scola_sCOLA.sh).") + parser.add_argument("-tf","--tilefile", type=str, default="scola_tile", help="Tile file name (located in workdir, default is scola_tile).") + parser.add_argument("--jobname", type=str, default="sCOLA_", help="Job name for the slurm jobs (default is sCOLA_).") parser.add_argument("-Nt","--N_tiles", type=int, default=4, help="Number of tiles per dimension.") - parser.add_argument("--sleep", type=float, default=1.0, help="Sleep time between each job submission (in s).") + parser.add_argument("--sleep", type=float, default=1.5, help="Sleep time between each job submission (in s).") args=parser.parse_args() @@ -268,7 +274,7 @@ if __name__ == "__main__": print(f"Max jobs at once: {MAX_JOBS_AT_ONCE} jobs") print(f"Check every: {CHECK_EVERY} jobs") print("---------------------------------------------------") - print(f"ETA: {convert_seconds_to_time(args.N_tiles**3*args.sleep*1.05)}") + print(f"ETA: {convert_seconds_to_time(args.N_tiles**3*args.sleep*1.2)}") print("Starting job submission...") @@ -329,12 +335,6 @@ if __name__ == "__main__": # Now wait for all jobs to finish while len(job_status_categories['CP'])= MAX_ERRORS: - raise RuntimeError(f"Error count exceeded {MAX_ERRORS}. Stopping job submission.") - # Check if the resubmit count exceeds the maximum - if resubmit_count >= MAX_RESUBMIT: - raise RuntimeError(f"Resubmit count exceeded {MAX_RESUBMIT}. Stopping job submission.") job_status_categories, resubmit_count, error_count = check_previous_jobs(args,job_ids_array,args.N_tiles**3+1,resubmit_count,error_count) print_summary_job_status(job_status_categories, args.N_tiles**3+1, resubmit_count, error_count) job_status_categories = cap_number_of_jobs(job_status_categories,job_ids_array,MAX_JOBS_AT_ONCE,args.sleep) @@ -343,5 +343,6 @@ if __name__ == "__main__": print("All jobs finished.") # Remove the slurm scripts for box in range(1,args.N_tiles**3+1): - os.remove(args.slurmdir+args.slurmfile+"."+str(box)) + if os.path.exists(args.slurmdir+args.slurmfile+"."+str(box)): + os.remove(args.slurmdir+args.slurmfile+"."+str(box)) \ No newline at end of file