small fixes
This commit is contained in:
parent
d0a8439be7
commit
a485db9465
3 changed files with 16 additions and 15 deletions
0
analysis/slices.py
Normal file → Executable file
0
analysis/slices.py
Normal file → Executable file
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
|
@ -152,9 +152,15 @@ def check_previous_jobs(args,job_ids_array,box,resubmit_count,error_count):
|
||||||
if job_id is None:
|
if job_id is None:
|
||||||
print(f"Error submitting job for box {box}")
|
print(f"Error submitting job for box {box}")
|
||||||
error_count+=1
|
error_count+=1
|
||||||
|
# Check if the error count exceeds the maximum
|
||||||
|
if error_count >= MAX_ERRORS:
|
||||||
|
raise RuntimeError(f"Error count exceeded {MAX_ERRORS}. Stopping job submission.")
|
||||||
else:
|
else:
|
||||||
job_ids_array[prev_box-1] = int(job_id)
|
job_ids_array[prev_box-1] = int(job_id)
|
||||||
resubmit_count += 1
|
resubmit_count += 1
|
||||||
|
# Check if the resubmit count exceeds the maximum
|
||||||
|
if resubmit_count >= MAX_RESUBMIT:
|
||||||
|
raise RuntimeError(f"Resubmit count exceeded {MAX_RESUBMIT}. Stopping job submission.")
|
||||||
job_status_categories[status].append(prev_box) # Classify as failed
|
job_status_categories[status].append(prev_box) # Classify as failed
|
||||||
# Sleep for a while before resubmitting the next job
|
# Sleep for a while before resubmitting the next job
|
||||||
time.sleep(args.sleep)
|
time.sleep(args.sleep)
|
||||||
|
@ -199,11 +205,11 @@ def print_summary_job_status(job_status_categories, box, resubmit_count, error_c
|
||||||
# Print summary of job statuses
|
# Print summary of job statuses
|
||||||
print(f"Job statuses after box {box}:")
|
print(f"Job statuses after box {box}:")
|
||||||
# Print a table with columns for each status and below the % of jobs in that status
|
# Print a table with columns for each status and below the % of jobs in that status
|
||||||
row0 = f"{'Status':<10}"
|
row0 = f"{'Status':<14}"
|
||||||
for status in job_status_categories.keys():
|
for status in job_status_categories.keys():
|
||||||
row0 += f"{status:>10}"
|
row0 += f"{status:>9} "
|
||||||
print(row0)
|
print(row0)
|
||||||
row1 = f"{'Percentage':<10}"
|
row1 = f"{'Percentage':<14}"
|
||||||
for status in job_status_categories.keys():
|
for status in job_status_categories.keys():
|
||||||
row1 += f"{len(job_status_categories[status])/box*100:>9.1f}%"
|
row1 += f"{len(job_status_categories[status])/box*100:>9.1f}%"
|
||||||
print(row1)
|
print(row1)
|
||||||
|
@ -220,13 +226,13 @@ if __name__ == "__main__":
|
||||||
parser.add_argument("-sd", "--slurmdir", type=str, default=None, help="Directory where the slurm scripts are saved (default is -d/slurm_scripts).")
|
parser.add_argument("-sd", "--slurmdir", type=str, default=None, help="Directory where the slurm scripts are saved (default is -d/slurm_scripts).")
|
||||||
parser.add_argument("-wd", "--workdir", type=str, default=None, help="Directory where the tiles are saved (default is -d/work).")
|
parser.add_argument("-wd", "--workdir", type=str, default=None, help="Directory where the tiles are saved (default is -d/work).")
|
||||||
|
|
||||||
parser.add_argument("-sf","--slurmfile", type=str, default="scola_sCOLA.sh", help="Slurm script file (located in slurmdir).")
|
parser.add_argument("-sf","--slurmfile", type=str, default="scola_sCOLA.sh", help="Slurm script file (located in slurmdir, default is scola_sCOLA.sh).")
|
||||||
parser.add_argument("-tf","--tilefile", type=str, default="scola_tile", help="Tile file name (located in workdir).")
|
parser.add_argument("-tf","--tilefile", type=str, default="scola_tile", help="Tile file name (located in workdir, default is scola_tile).")
|
||||||
parser.add_argument("--jobname", type=str, default="sCOLA_", help="Job name for the slurm jobs.")
|
parser.add_argument("--jobname", type=str, default="sCOLA_", help="Job name for the slurm jobs (default is sCOLA_).")
|
||||||
|
|
||||||
parser.add_argument("-Nt","--N_tiles", type=int, default=4, help="Number of tiles per dimension.")
|
parser.add_argument("-Nt","--N_tiles", type=int, default=4, help="Number of tiles per dimension.")
|
||||||
|
|
||||||
parser.add_argument("--sleep", type=float, default=1.0, help="Sleep time between each job submission (in s).")
|
parser.add_argument("--sleep", type=float, default=1.5, help="Sleep time between each job submission (in s).")
|
||||||
|
|
||||||
args=parser.parse_args()
|
args=parser.parse_args()
|
||||||
|
|
||||||
|
@ -268,7 +274,7 @@ if __name__ == "__main__":
|
||||||
print(f"Max jobs at once: {MAX_JOBS_AT_ONCE} jobs")
|
print(f"Max jobs at once: {MAX_JOBS_AT_ONCE} jobs")
|
||||||
print(f"Check every: {CHECK_EVERY} jobs")
|
print(f"Check every: {CHECK_EVERY} jobs")
|
||||||
print("---------------------------------------------------")
|
print("---------------------------------------------------")
|
||||||
print(f"ETA: {convert_seconds_to_time(args.N_tiles**3*args.sleep*1.05)}")
|
print(f"ETA: {convert_seconds_to_time(args.N_tiles**3*args.sleep*1.2)}")
|
||||||
print("Starting job submission...")
|
print("Starting job submission...")
|
||||||
|
|
||||||
|
|
||||||
|
@ -329,12 +335,6 @@ if __name__ == "__main__":
|
||||||
# Now wait for all jobs to finish
|
# Now wait for all jobs to finish
|
||||||
while len(job_status_categories['CP'])<args.N_tiles**3:
|
while len(job_status_categories['CP'])<args.N_tiles**3:
|
||||||
time.sleep(10*args.sleep)
|
time.sleep(10*args.sleep)
|
||||||
# Check if the error count exceeds the maximum
|
|
||||||
if error_count >= MAX_ERRORS:
|
|
||||||
raise RuntimeError(f"Error count exceeded {MAX_ERRORS}. Stopping job submission.")
|
|
||||||
# Check if the resubmit count exceeds the maximum
|
|
||||||
if resubmit_count >= MAX_RESUBMIT:
|
|
||||||
raise RuntimeError(f"Resubmit count exceeded {MAX_RESUBMIT}. Stopping job submission.")
|
|
||||||
job_status_categories, resubmit_count, error_count = check_previous_jobs(args,job_ids_array,args.N_tiles**3+1,resubmit_count,error_count)
|
job_status_categories, resubmit_count, error_count = check_previous_jobs(args,job_ids_array,args.N_tiles**3+1,resubmit_count,error_count)
|
||||||
print_summary_job_status(job_status_categories, args.N_tiles**3+1, resubmit_count, error_count)
|
print_summary_job_status(job_status_categories, args.N_tiles**3+1, resubmit_count, error_count)
|
||||||
job_status_categories = cap_number_of_jobs(job_status_categories,job_ids_array,MAX_JOBS_AT_ONCE,args.sleep)
|
job_status_categories = cap_number_of_jobs(job_status_categories,job_ids_array,MAX_JOBS_AT_ONCE,args.sleep)
|
||||||
|
@ -343,5 +343,6 @@ if __name__ == "__main__":
|
||||||
print("All jobs finished.")
|
print("All jobs finished.")
|
||||||
# Remove the slurm scripts
|
# Remove the slurm scripts
|
||||||
for box in range(1,args.N_tiles**3+1):
|
for box in range(1,args.N_tiles**3+1):
|
||||||
|
if os.path.exists(args.slurmdir+args.slurmfile+"."+str(box)):
|
||||||
os.remove(args.slurmdir+args.slurmfile+"."+str(box))
|
os.remove(args.slurmdir+args.slurmfile+"."+str(box))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue