From 75515949dc2f3b29260f69a921568e9a7cea8c9f Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 13 Dec 2011 17:03:14 -0500 Subject: [PATCH 1/8] Incomplete implementatiof Eskow algorithm --- src/eskow.cpp | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 src/eskow.cpp diff --git a/src/eskow.cpp b/src/eskow.cpp new file mode 100644 index 0000000..a4a8884 --- /dev/null +++ b/src/eskow.cpp @@ -0,0 +1,109 @@ + +namespace CholeskyEskow +{ + + template + void minmax_diag(A&m m, int j, int N, T& minval, T& maxval, int& i_min, int& i_max) + { + minval = maxval = m(j,j); + + for (int k = j+1; k < N; k++) + { + maxval = std::max(maxval, m(k,k)); + minval = std::min(minval, m(k,k)); + } + + for (int k = j; k < N; k++) + { + if (m(k,k) == minval) + i_min = k; + if (m(k,k) == maxval) + i_max = k; + } + } + + template + void swap_rows(A& m, int N, int i0, int i1) + { + for (int r = 0; r < N; r++) + std::swap(m(r,i0), m(r,i1)); + } + + template + void swap_cols(A& m, int N, int i0, int i1) + { + for (int c = 0; c < N; c++) + std::swap(m(i0,c), m(i1,c)); + } + + template + T square(T x) + { + return x*x; + } + + template + T max_row(A& m, int i, int j, int N) + { + T v = m(i,i) - square(m(i,j))/m(j,j); + } + + + template + void cholesky_eskow(A& m, int N) + { + T tau_bar = std::pow(mach_epsilon(), 2./3); + T tau = std::pow(mach_epsilon(), 1./3); + T mu = 0.1; + bool phaseone = true; + T gamma = max_diag(m, 0, N); + int j; + + for (j = 0; j < N && phaseone; j++) + { + T minval, maxval; + + minmax_diag(m, j, N, minval, maxval, i_min, i_max); + if (maxval < tau_bar*gamma || minval < -mu*maxval) + { + phaseone = false; + break; + } + + if (i_max != j) + { + swap_cols(m, N, i_max, j); + swap_rows(m, N, i_max, j); + } + + if (max_row(m, j, N) < -mu*gamma) + { + phaseone = false; + break; + } + + T L_jj = std::sqrt(m(j,j)); + + m(j,j) = L_jj; + for (int i = j+1; i < N; i++) + { + m(i,j) /= L_jj; + for (int k = j+1; k < i; k++) + m(i,k) -= m(i,j)*m(k,j); + } + } + + if (!phaseone && j == N-1) + { + T A_nn = m(N-1,N-1); + T delta = -A_nn + std::max(tau*(-A_nn)/(1-tau), tau_bar*gamma); + + m(N-1,N-1) = std::sqrt(m(N-1,N-1) + delta); + } + if (!phaseone && j < (N-1)) + { + + } + } + +}; From 8dd58836e0813fc3ca3c8d8df48ffbae64372cd4 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 13 Dec 2011 17:23:50 -0500 Subject: [PATCH 2/8] Finished draft implementation. --- src/{eskow.cpp => eskow.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{eskow.cpp => eskow.hpp} (100%) diff --git a/src/eskow.cpp b/src/eskow.hpp similarity index 100% rename from src/eskow.cpp rename to src/eskow.hpp From 8d4419d2fd6be46ce0f1de487867231f02306849 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 13 Dec 2011 17:48:37 -0500 Subject: [PATCH 3/8] Got a compiling version of the algorithm --- sample/CMakeLists.txt | 4 ++ sample/testEskow.cpp | 50 ++++++++++++++ src/eskow.hpp | 150 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 194 insertions(+), 10 deletions(-) create mode 100644 sample/testEskow.cpp diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index c2c835a..82eabcd 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -26,3 +26,7 @@ if (HDF5_FOUND) add_executable(testReadFlash testReadFlash.cpp) target_link_libraries(testReadFlash ${tolink}) endif (HDF5_FOUND) + + +add_executable(testEskow testEskow.cpp) +target_link_libraries(testEskow ${tolink}) \ No newline at end of file diff --git a/sample/testEskow.cpp b/sample/testEskow.cpp new file mode 100644 index 0000000..05a92d5 --- /dev/null +++ b/sample/testEskow.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include "eskow.hpp" + +using namespace std; + +double Hartmann_Matrix[6][6] = { + { 14.8253, -6.4243, 7.8746, -1.2498, 10.2733, 10.2733 }, + { -6.4243, 15.1024, -1.1155, -0.2761, -8.2117, -8.2117 }, + { 7.8746, -1.1155, 51.8519, -23.3482, 12.5902, 12.5902 }, + { -1.2498, -0.2761, -23.3482, 22.7962, -9.8958, -9.8958 }, + { 10.2733, -8.2117, 12.5902, -9.8958, 21.0656, 21.0656 }, + { 10.2733, -8.2117, 12.5902, -9.8958, 21.0656, 21.0656 } +}; + +struct MatrixOp +{ + vector M; + int N; + + double& operator()(int i, int j) + { + return M[i*N + j]; + } +}; + +int main() +{ + MatrixOp M; + double norm_E; + + M.N = 6; + M.M.resize(M.N*M.N); + + memcpy(&M.M[0], &Hartmann_Matrix[0][0], sizeof(double)*36); + + CholeskyEskow::cholesky_eskow(M, M.N, norm_E); + + for (int i = 0; i < M.N; i++) + { + for (int j = 0; j < M.N; j++) + { + cout << setprecision(25) << M(i,j) << " "; + } + cout << endl; + } + return 0; +} diff --git a/src/eskow.hpp b/src/eskow.hpp index a4a8884..d18d56c 100644 --- a/src/eskow.hpp +++ b/src/eskow.hpp @@ -1,9 +1,29 @@ +#ifndef __ESKOW_CHOLESKY_HPP +#define __ESKOW_CHOLESKY_HPP + +#include +#include +#include "mach.hpp" + +/* Implementation of Schnabel & Eskow, 1999, Vol. 9, No. 4, pp. 1135-148, SIAM J. OPTIM. */ namespace CholeskyEskow { template - void minmax_diag(A&m m, int j, int N, T& minval, T& maxval, int& i_min, int& i_max) + T max_diag(A& m, int j, int N) + { + T maxval = m(j,j); + + for (int k = j+1; k < N; k++) + { + maxval = std::max(maxval, m(k,k)); + } + return maxval; + } + + template + void minmax_diag(A& m, int j, int N, T& minval, T& maxval, int& i_min, int& i_max) { minval = maxval = m(j,j); @@ -43,27 +63,54 @@ namespace CholeskyEskow } template - T max_row(A& m, int i, int j, int N) + T min_row(A& m, int j, int N) { - T v = m(i,i) - square(m(i,j))/m(j,j); + T a = 1/m(j,j); + T v = m(j+1,j+1) - square(m(j+1,j))*a; + + for (int i = j+2; i < N; i++) + { + v = std::max(v, m(i, i) - square(m(i,j))*a); + } + + return v; } + template + int g_max(const std::vector& g, int j, int N) + { + T a = g[j]; + int k = j; + + for (int i = j+1; i < N; i++) + { + if (a < g[i]) + { + a = g[i]; + k = i; + } + } + return k; + } template - void cholesky_eskow(A& m, int N) + void cholesky_eskow(A& m, int N, T& norm_E) { T tau_bar = std::pow(mach_epsilon(), 2./3); T tau = std::pow(mach_epsilon(), 1./3); T mu = 0.1; bool phaseone = true; - T gamma = max_diag(m, 0, N); + T gamma = max_diag(m, 0, N); int j; + norm_E = 0; + for (j = 0; j < N && phaseone; j++) { T minval, maxval; + int i_min, i_max; - minmax_diag(m, j, N, minval, maxval, i_min, i_max); + minmax_diag(m, j, N, minval, maxval, i_min, i_max); if (maxval < tau_bar*gamma || minval < -mu*maxval) { phaseone = false; @@ -72,11 +119,11 @@ namespace CholeskyEskow if (i_max != j) { - swap_cols(m, N, i_max, j); - swap_rows(m, N, i_max, j); + swap_cols(m, N, i_max, j); + swap_rows(m, N, i_max, j); } - if (max_row(m, j, N) < -mu*gamma) + if (min_row(m, j, N) < -mu*gamma) { phaseone = false; break; @@ -93,6 +140,7 @@ namespace CholeskyEskow } } + if (!phaseone && j == N-1) { T A_nn = m(N-1,N-1); @@ -100,10 +148,92 @@ namespace CholeskyEskow m(N-1,N-1) = std::sqrt(m(N-1,N-1) + delta); } + + + + if (!phaseone && j < (N-1)) { - + int k = j-1; + std::vector g(N); + + for (int i = k+1; i < N; i++) + { + g[i] = m(i,i); + for (int j = k+1; j < i; j++) + g[i] -= std::abs(m(i,j)); + for (int j = i+1; j < N; j++) + g[i] -= std::abs(m(j,i)); + } + + T delta, delta_prev = 0; + + for (int j = k+1; j < N-2; j++) + { + int i = g_max(g, j, N); + T norm_j; + + if (i != j) + { + swap_cols(m, N, i, j); + swap_rows(m, N, i, j); + } + + for (int i = j+1; j < N; j++) + { + norm_j += std::abs(m(i,j)); + } + + delta = std::max(delta_prev, std::max((T)0, -m(j,j) + std::max(norm_j,tau_bar*gamma))); + if (delta > 0) + { + m(j,j) += delta; + delta_prev = delta; + } + + if (m(j,j) != norm_j) + { + T temp = 1 - norm_j/m(j,j); + + for (int i = j+1; j < N; j++) + { + g[i] += std::abs(m(i,j))*temp; + } + } + + // Now we do the classic cholesky iteration + T L_jj = std::sqrt(m(j,j)); + + m(j,j) = L_jj; + for (int i = j+1; i < N; i++) + { + m(i,j) /= L_jj; + for (int k = j+1; k < i; k++) + m(i,k) -= m(i,j)*m(k,j); + } + } + + // The final 2x2 submatrix is special + T A00 = m(N-2, N-2), A01 = m(N-2, N-1), A11 = m(N-1,N-1); + T sq_DELTA = std::sqrt(square(A00-A11) + square(A01)); + T lambda_hi = 0.5*((A00+A11) + sq_DELTA); + T lambda_lo = 0.5*((A00+A11) - sq_DELTA); + + delta = std::max(std::max((T)0, -lambda_lo + std::max(tau*sq_DELTA/(1-tau), tau_bar*gamma)),delta_prev); + if (delta > 0) + { + m(N-1,N-1) += delta; + m(N,N) += delta; + delta_prev = delta; + } + m(N-2,N-2) = A00 = std::sqrt(A00); + m(N-1,N-2) = (A01 /= A00); + m(N-1,N-1) = std::sqrt(A11-A01*A01); + norm_E = delta_prev; } } }; + + +#endif From cf279bbb19a267a99f1bc66cd21de47d0efb79a6 Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Tue, 13 Dec 2011 22:43:53 -0500 Subject: [PATCH 4/8] Added missing file --- sample/testEskow.cpp | 5 ++++- src/mach.hpp | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 src/mach.hpp diff --git a/sample/testEskow.cpp b/sample/testEskow.cpp index 05a92d5..ff19375 100644 --- a/sample/testEskow.cpp +++ b/sample/testEskow.cpp @@ -42,7 +42,10 @@ int main() { for (int j = 0; j < M.N; j++) { - cout << setprecision(25) << M(i,j) << " "; + if (j > i) + cout << "0 "; + else + cout << setprecision(25) << M(i,j) << " "; } cout << endl; } diff --git a/src/mach.hpp b/src/mach.hpp new file mode 100644 index 0000000..8e86883 --- /dev/null +++ b/src/mach.hpp @@ -0,0 +1,19 @@ +#ifndef __COSMO_MACHINE_TEST_HPP +#define __COSMO_MACHINE_TEST_HPP + +#include + +template +T mach_epsilon() +{ + T eps = (T)1; + do + { + eps /= 2; + } + while (( (T)1 + (eps/2)) != (T)1); + std::cout << " epsilon = " << eps << std::endl; + return eps; +} + +#endif From 196e17c2425f5fee9391267d571fe710789d7204 Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Wed, 14 Dec 2011 17:40:10 -0500 Subject: [PATCH 5/8] Fixed decomposition. Add sample matrix. --- sample/Hartmann_Matrix.txt | 7 +++ sample/testEskow.cpp | 26 ++++++++--- src/eskow.hpp | 94 +++++++++++++++++++++++++------------- 3 files changed, 89 insertions(+), 38 deletions(-) create mode 100644 sample/Hartmann_Matrix.txt diff --git a/sample/Hartmann_Matrix.txt b/sample/Hartmann_Matrix.txt new file mode 100644 index 0000000..18df79d --- /dev/null +++ b/sample/Hartmann_Matrix.txt @@ -0,0 +1,7 @@ +6 + 14.8253 -6.4243 7.8746 -1.2498 10.2733 10.2733 + -6.4243 15.1024 -1.1155 -0.2761 -8.2117 -8.2117 + 7.8746 -1.1155 51.8519 -23.3482 12.5902 12.5902 + -1.2498 -0.2761 -23.3482 22.7962 -9.8958 -9.8958 + 10.2733 -8.2117 12.5902 -9.8958 21.0656 21.0656 + 10.2733 -8.2117 12.5902 -9.8958 21.0656 21.0656 diff --git a/sample/testEskow.cpp b/sample/testEskow.cpp index ff19375..2f173a6 100644 --- a/sample/testEskow.cpp +++ b/sample/testEskow.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -26,28 +27,39 @@ struct MatrixOp } }; -int main() +int main(int argc, char **argv) { MatrixOp M; double norm_E; + ifstream fi(argv[1]); + ofstream f("eskowed.txt"); + CholeskyEskow chol; - M.N = 6; + fi >> M.N; M.M.resize(M.N*M.N); - memcpy(&M.M[0], &Hartmann_Matrix[0][0], sizeof(double)*36); + for (int i = 0; i < M.N; i++) + { + for (int j = 0; j < M.N; j++) + { + fi >> M(i,j); + if (j > i) + M(i,j) =0; + } + } - CholeskyEskow::cholesky_eskow(M, M.N, norm_E); + chol.cholesky(M, M.N, norm_E); for (int i = 0; i < M.N; i++) { for (int j = 0; j < M.N; j++) { if (j > i) - cout << "0 "; + f << "0 "; else - cout << setprecision(25) << M(i,j) << " "; + f << setprecision(25) << M(i,j) << " "; } - cout << endl; + f << endl; } return 0; } diff --git a/src/eskow.hpp b/src/eskow.hpp index d18d56c..ac9b292 100644 --- a/src/eskow.hpp +++ b/src/eskow.hpp @@ -7,24 +7,49 @@ /* Implementation of Schnabel & Eskow, 1999, Vol. 9, No. 4, pp. 1135-148, SIAM J. OPTIM. */ -namespace CholeskyEskow +template +class CholeskyEskow { +private: + static const bool verbose_eskow = true; + T tau, tau_bar, mu; + + void print_matrix(A& m, int N) + { + using std::cout; + using std::endl; + using std::setprecision; + + if (verbose_eskow) + { + + for (int i = 0; i < N; i++) + { + for (int j = 0; j < N; j++) + { + cout.width(6); + cout << setprecision(5) << m(i,j) << " "; + } + cout << endl; + } + cout << endl; + } + } - template T max_diag(A& m, int j, int N) { - T maxval = m(j,j); + T maxval = std::abs(m(j,j)); for (int k = j+1; k < N; k++) { - maxval = std::max(maxval, m(k,k)); + maxval = std::max(maxval, std::abs(m(k,k))); } return maxval; } - template void minmax_diag(A& m, int j, int N, T& minval, T& maxval, int& i_min, int& i_max) { + i_min = i_max = j; minval = maxval = m(j,j); for (int k = j+1; k < N; k++) @@ -35,34 +60,30 @@ namespace CholeskyEskow for (int k = j; k < N; k++) { - if (m(k,k) == minval) + if (m(k,k) == minval && i_min < 0) i_min = k; - if (m(k,k) == maxval) + if (m(k,k) == maxval && i_max < 0) i_max = k; } } - template void swap_rows(A& m, int N, int i0, int i1) { for (int r = 0; r < N; r++) std::swap(m(r,i0), m(r,i1)); } - template void swap_cols(A& m, int N, int i0, int i1) { for (int c = 0; c < N; c++) std::swap(m(i0,c), m(i1,c)); } - template T square(T x) { return x*x; } - template T min_row(A& m, int j, int N) { T a = 1/m(j,j); @@ -70,13 +91,12 @@ namespace CholeskyEskow for (int i = j+2; i < N; i++) { - v = std::max(v, m(i, i) - square(m(i,j))*a); + v = std::min(v, m(i, i) - square(m(i,j))*a); } return v; } - template int g_max(const std::vector& g, int j, int N) { T a = g[j]; @@ -93,37 +113,45 @@ namespace CholeskyEskow return k; } - template - void cholesky_eskow(A& m, int N, T& norm_E) +public: + CholeskyEskow() + { + tau = std::pow(mach_epsilon(), 1./3); + tau_bar = std::pow(mach_epsilon(), 2./3); + mu=0.1; + } + + void cholesky(A& m, int N, T& norm_E) { - T tau_bar = std::pow(mach_epsilon(), 2./3); - T tau = std::pow(mach_epsilon(), 1./3); - T mu = 0.1; bool phaseone = true; - T gamma = max_diag(m, 0, N); + T gamma = max_diag(m, 0, N); int j; norm_E = 0; - + for (j = 0; j < N && phaseone; j++) { T minval, maxval; int i_min, i_max; - - minmax_diag(m, j, N, minval, maxval, i_min, i_max); + + print_matrix(m, N); + + minmax_diag(m, j, N, minval, maxval, i_min, i_max); if (maxval < tau_bar*gamma || minval < -mu*maxval) { phaseone = false; break; } - if (i_max != j) + if (i_max != j) { - swap_cols(m, N, i_max, j); - swap_rows(m, N, i_max, j); + std::cout << "Have to swap i=" << i_max << " and j=" << j << std::endl; + swap_cols(m, N, i_max, j); + swap_rows(m, N, i_max, j); + } - if (min_row(m, j, N) < -mu*gamma) + if (min_row(m, j, N) < -mu*gamma) { phaseone = false; break; @@ -135,7 +163,7 @@ namespace CholeskyEskow for (int i = j+1; i < N; i++) { m(i,j) /= L_jj; - for (int k = j+1; k < i; k++) + for (int k = j+1; k <= i; k++) m(i,k) -= m(i,j)*m(k,j); } } @@ -151,9 +179,10 @@ namespace CholeskyEskow - if (!phaseone && j < (N-1)) { + std::cout << "Phase two ! (j=" << j << ")" << std::endl; + int k = j-1; std::vector g(N); @@ -173,12 +202,15 @@ namespace CholeskyEskow int i = g_max(g, j, N); T norm_j; + print_matrix(m, N); + if (i != j) { - swap_cols(m, N, i, j); - swap_rows(m, N, i, j); + swap_cols(m, N, i, j); + swap_rows(m, N, i, j); } + for (int i = j+1; j < N; j++) { norm_j += std::abs(m(i,j)); @@ -208,7 +240,7 @@ namespace CholeskyEskow for (int i = j+1; i < N; i++) { m(i,j) /= L_jj; - for (int k = j+1; k < i; k++) + for (int k = j+1; k <= i; k++) m(i,k) -= m(i,j)*m(k,j); } } From dc8ef0be34d397cbed52be03789d6a96aa343aa3 Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Thu, 15 Dec 2011 12:12:10 -0500 Subject: [PATCH 6/8] Flash fixes --- src/h5_readFlash.cpp | 26 +++++++++++++++++++------- src/h5_readFlash.hpp | 14 +++++++------- src/loadFlash.cpp | 23 +++++++++++++++-------- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/h5_readFlash.cpp b/src/h5_readFlash.cpp index 0559a8f..0c78f58 100644 --- a/src/h5_readFlash.cpp +++ b/src/h5_readFlash.cpp @@ -179,13 +179,13 @@ void h5_read_flash3_particles (H5File* file, int* totalparticles, int* localnp, int* particle_offset, - float pos1[], - float pos2[], - float pos3[], - float vel1[], - float vel2[], - float vel3[], - int id[]) + float *pos1, + float *pos2, + float *pos3, + float *vel1, + float *vel2, + float *vel3, + int *id) { herr_t status; @@ -339,15 +339,27 @@ void h5_read_flash3_particles (H5File* file, /* convert buffer into particle struct */ + if (id) { for(p=0; p < (pcount); p++) { id[p+poffset] = (int) *(partBuffer+iptag-1+p*numProps); + } } + + if (pos1 && pos2 && pos3) { + for(p=0; p < (pcount); p++) { pos1[p+poffset] = (float) *(partBuffer+ipx-1+p*numProps); pos2[p+poffset] = (float) *(partBuffer+ipy-1+p*numProps); pos3[p+poffset] = (float) *(partBuffer+ipz-1+p*numProps); + } + } + + + if (vel1 && vel2 && vel3) { + for(p=0; p < (pcount); p++) { vel1[p+poffset] = (float) *(partBuffer+ipvx-1+p*numProps); vel2[p+poffset] = (float) *(partBuffer+ipvy-1+p*numProps); vel3[p+poffset] = (float) *(partBuffer+ipvz-1+p*numProps); } + } memspace.close(); //status = H5Sclose(memspace); diff --git a/src/h5_readFlash.hpp b/src/h5_readFlash.hpp index 273ca07..5187232 100644 --- a/src/h5_readFlash.hpp +++ b/src/h5_readFlash.hpp @@ -23,13 +23,13 @@ void h5_read_flash3_particles (H5File* file, int* totalparticles, int* localnp, int* particle_offset, - float pos1[], - float pos2[], - float pos3[], - float vel1[], - float vel2[], - float vel3[], - int id[]); + float *pos1, + float *pos2, + float *pos3, + float *vel1, + float *vel2, + float *vel3, + int *id); void h5_read_flash3_header_info(H5File* file, double* time, /* simulation time */ diff --git a/src/loadFlash.cpp b/src/loadFlash.cpp index cc58521..ada498d 100644 --- a/src/loadFlash.cpp +++ b/src/loadFlash.cpp @@ -51,29 +51,36 @@ SimuData *CosmoTool::loadFlashMulti(const char *fname, int id, int loadflags) data->Omega_Lambda = omegalambda; // particle data - for (int i = 0; i < 3; i++) { - data->Pos[i] = new float[data->NumPart]; - if (data->Pos[i] == 0) { - delete data; + if (loadflags& NEED_POSITION) { + for (int i = 0; i < 3; i++) { + data->Pos[i] = new float[data->NumPart]; + if (data->Pos[i] == 0) { + delete data; return 0; } - } + } } + + if (loadflags &NEED_VELOCITY) { for (int i = 0; i < 3; i++) { data->Vel[i] = new float[data->NumPart]; if (data->Vel[i] == 0) { delete data; return 0; } - } + } } + if (loadflags & NEED_GADGET_ID) { data->Id = new int[data->NumPart]; if (data->Id == 0) { delete data; return 0; } + } int offset = 0; + + if (loadflags &(NEED_GADGET_ID|NEED_POSITION|NEED_VELOCITY)) h5_read_flash3_particles(&file, &npart, &npart, &offset, data->Pos[0], data->Pos[1], data->Pos[2], data->Vel[0], data->Vel[1], data->Vel[2], @@ -81,8 +88,8 @@ SimuData *CosmoTool::loadFlashMulti(const char *fname, int id, int loadflags) for (int i = 0; i < 3; i++) { for (int n = 0; n < data->NumPart; n++) { - data->Pos[i][n] = data->Pos[i][n] * data->Hubble / kpc2cm; - data->Vel[i][n] = data->Vel[i][n] * data->Hubble / km2cm; + if (loadflags& NEED_POSITION) data->Pos[i][n] = data->Pos[i][n] * data->Hubble / kpc2cm; + if (loadflags&NEED_VELOCITY) data->Vel[i][n] = data->Vel[i][n] * data->Hubble / km2cm; } } From 6efe49a1efc0d0253e61973876d88fe33baa02bc Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 29 Mar 2012 15:54:34 -0400 Subject: [PATCH 7/8] Machine precision detection --- src/cic.cpp | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/cic.hpp | 35 +++++++++ src/mach.hpp | 18 +++++ 3 files changed, 258 insertions(+) create mode 100644 src/cic.cpp create mode 100644 src/cic.hpp create mode 100644 src/mach.hpp diff --git a/src/cic.cpp b/src/cic.cpp new file mode 100644 index 0000000..71a106d --- /dev/null +++ b/src/cic.cpp @@ -0,0 +1,205 @@ +#include +#include +#include +#include "cic.hpp" + +CICFilter::CICFilter(uint32_t N, double len) +{ + spatialLen = len; + szGrid = N; + totalSize = N*N*N; + densityGrid = new CICType[totalSize]; + resetMesh(); +} + +CICFilter::~CICFilter() +{ + delete[] densityGrid; +} + +void CICFilter::resetMesh() +{ + for (uint32_t i = 0; i < totalSize; i++) + densityGrid[i] = 0; +} + +void CICFilter::putParticles(CICParticles *particles, uint32_t N) +{ +#if 0 + uint32_t numCorners = 1 << NUMDIMS; + + for (uint32_t i = 0; i < N; i++) + { + Coordinates xyz; + int32_t ixyz[NUMDIMS]; + int32_t rxyz[NUMDIMS]; + CICType alpha[NUMDIMS]; + CICType beta[NUMDIMS]; + for (int j = 0; j < NUMDIMS; j++) + { + xyz[j] = (particles[i].coords[j] / spatialLen * szGrid); + ixyz[j] = (int32_t)floor(xyz[j] - 0.5); + beta[j] = xyz[j] - ixyz[j] - 0.5; + alpha[j] = 1 - beta[j]; + if (ixyz[j] < 0) + ixyz[j] = szGrid-1; + } + + CICType tot_mass = 0; + for (int j = 0; j < numCorners; j++) + { + CICType rel_mass = 1; + uint32_t idx = 0; + uint32_t mul = 1; + uint32_t mul2 = 1; + + for (int k = 0; k < NUMDIMS; k++) + { + uint32_t ipos = ((j & mul2) != 0); + + if (ipos == 1) + { + rel_mass *= beta[k]; + } + else + { + rel_mass *= alpha[k]; + } + + rxyz[k] = ixyz[k] + ipos; + + if (rxyz[k] >= szGrid) + idx += (rxyz[k] - szGrid) * mul; + else + idx += rxyz[k] * mul; + + mul2 *= 2; + mul *= szGrid; + } + + assert(rel_mass > 0); + assert(rel_mass < 1); + assert(idx < totalSize); + densityGrid[idx] += rel_mass * particles[i].mass; + tot_mass += rel_mass; + } + assert(tot_mass < 1.1); + assert(tot_mass > 0.9); + } +#endif +#if 0 + for (uint32_t i = 0; i < N; i++) + { + Coordinates xyz; + int32_t ixyz[NUMDIMS]; + for (int j = 0; j < NUMDIMS; j++) + { + xyz[j] = (particles[i].coords[j] / spatialLen * szGrid); + ixyz[j] = (int32_t)round(xyz[j] - 0.5); + if (ixyz[j] < 0) + ixyz[j] = szGrid-1; + else if (ixyz[j] >= szGrid) + ixyz[j] = 0; + } + + uint32_t idx = ixyz[0] + ixyz[1] * szGrid + ixyz[2] * szGrid * szGrid; + densityGrid[idx] += particles[i].mass; + } + +#endif + + for (uint32_t i = 0; i < N; i++) + { + CICType x, y, z; + int32_t ix, iy, iz; + int32_t ix2, iy2, iz2; + + x = particles[i].coords[0] / spatialLen * szGrid + 0.5; + y = particles[i].coords[1] / spatialLen * szGrid + 0.5; + z = particles[i].coords[2] / spatialLen * szGrid + 0.5; + + if (x < 0) + x += szGrid; + if (y < 0) + y += szGrid; + if (z < 0) + z += szGrid; + + ix = ((int32_t)floor(x)); + iy = ((int32_t)floor(y)); + iz = ((int32_t)floor(z)); + + ix2 = (ix + 1) % szGrid; + iy2 = (iy + 1) % szGrid; + iz2 = (iz + 1) % szGrid; + + CICType alpha_x = x - ix; + CICType alpha_y = y - iy; + CICType alpha_z = z - iz; + + ix %= szGrid; + iy %= szGrid; + iz %= szGrid; + + assert(alpha_x >= 0); + assert(alpha_y >= 0); + assert(alpha_z >= 0); + + CICType beta_x = 1 - alpha_x; + CICType beta_y = 1 - alpha_y; + CICType beta_z = 1 - alpha_z; + + assert(beta_x >= 0); + assert(beta_y >= 0); + assert(beta_z >= 0); + + CICType mass = particles[i].mass; + uint32_t idx; + + // 000 + idx = ix + (iy + iz * szGrid) * szGrid; + densityGrid[idx] += + mass * beta_x * beta_y * beta_z; + + // 100 + idx = ix2 + (iy + iz * szGrid) * szGrid; + densityGrid[idx] += + mass * alpha_x * beta_y * beta_z; + + // 010 + idx = ix + (iy2 + iz * szGrid) * szGrid; + densityGrid[idx] += + mass * beta_x * alpha_y * beta_z; + + // 110 + idx = ix2 + (iy2 + iz * szGrid) * szGrid; + densityGrid[idx] += + mass * alpha_x * alpha_y * beta_z; + + // 001 + idx = ix + (iy + iz2 * szGrid) * szGrid; + densityGrid[idx] += + mass * beta_x * beta_y * alpha_z; + + // 101 + idx = ix2 + (iy + iz2 * szGrid) * szGrid; + densityGrid[idx] += + mass * alpha_x * beta_y * alpha_z; + + // 011 + idx = ix + (iy2 + iz2 * szGrid) * szGrid; + densityGrid[idx] += + mass * beta_x * alpha_y * alpha_z; + + // 111 + idx = ix2 + (iy2 + iz2 * szGrid) * szGrid; + densityGrid[idx] += + mass * alpha_x * alpha_y * alpha_z; + } +} + +void CICFilter::getDensityField(CICType*& field, uint32_t& res) +{ + field = densityGrid; + res = totalSize; +} diff --git a/src/cic.hpp b/src/cic.hpp new file mode 100644 index 0000000..d522d00 --- /dev/null +++ b/src/cic.hpp @@ -0,0 +1,35 @@ +#ifndef __CICFILTER_HPP +#define __CICFILTER_HPP + +#include "CosmoTool/config.hpp" +#include + +using namespace CosmoTool; + +typedef float CICType; + + typedef struct + { + float mass; + Coordinates coords; + } CICParticles; + + class CICFilter + { + public: + CICFilter(uint32_t resolution, double spatialLen); + ~CICFilter(); + + void resetMesh(); + void putParticles(CICParticles *particles, uint32_t N); + + void getDensityField(CICType*& field, uint32_t& res); + + protected: + CICType *densityGrid; + double spatialLen; + uint32_t totalSize; + uint32_t szGrid; + }; + +#endif diff --git a/src/mach.hpp b/src/mach.hpp new file mode 100644 index 0000000..60c98bf --- /dev/null +++ b/src/mach.hpp @@ -0,0 +1,18 @@ +#ifndef __MACHINE_INFORMATION_HPP +#define __MACHINE_INFORMATION_HPP + +template +T mach_epsilon() +{ + T eps = (T)1; + + do + { + eps /= 2; + } + while ((T)(1 + (eps/2)) != (T)1); + + return eps; +} + +#endif From d17fbbb66d8df4a10b7f6698154580179c11e84a Mon Sep 17 00:00:00 2001 From: Guilhem Lavaux Date: Fri, 30 Mar 2012 11:47:48 -0400 Subject: [PATCH 8/8] Added support for writing gadget files --- src/loadGadget.cpp | 70 ++++++++++++++++++++++++++++++++++++++++++++++ src/loadGadget.hpp | 5 +++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/loadGadget.cpp b/src/loadGadget.cpp index 47f6bb9..945a5ae 100644 --- a/src/loadGadget.cpp +++ b/src/loadGadget.cpp @@ -273,3 +273,73 @@ SimuData *CosmoTool::loadGadgetMulti(const char *fname, int id, int loadflags, i } + +void CosmoTool::writeGadget(const char *fname, SimuData *data, int GadgetFormat) +{ + UnformattedWrite *f; + int npart[6]; + float mass[6]; + + if (data->Pos[0] == 0 || data->Vel[0] == 0 || data->Id == 0) + return; + + f = new UnformattedWrite(fname); + if (f == 0) + return; + + for (int i = 0; i < 6; i++) + { + npart[i] = 0; + mass[i] = 0; + } + + npart[1] = data->NumPart; + + f->beginCheckpoint(); + for (int i = 0; i < 6; i++) + f->writeInt32(npart[i]); + for (int i = 0; i < 6; i++) + f->writeReal64(mass[i]); + + f->writeReal64(data->time); + f->writeReal64(1/data->time-1); + f->writeInt32(0); + f->writeInt32(0); + + for (int i = 0; i < 6; i++) + f->writeInt32(npart[i]); + f->writeInt32(0); + f->writeInt32(1); + f->writeReal64(data->BoxSize); + f->writeReal64(data->Omega_M); + f->writeReal64(data->Omega_Lambda); + f->writeReal64(data->Hubble); + f->endCheckpoint(); + + f->beginCheckpoint(); + for(int n = 0; n < data->NumPart; n++) { + for (int k = 0; k < 3; k++) + f->writeReal32(data->Pos[k][n]); + } + f->endCheckpoint(); + + float velmul = 1.0; + if (GadgetFormat == 1) + velmul = sqrt(data->time); + + f->beginCheckpoint(); + for(int n = 0; n < data->NumPart; n++) { + for (int k = 0; k < 3; k++) + f->writeReal32(data->Vel[k][n]/velmul); + } + f->endCheckpoint(); + + f->beginCheckpoint(); + for(int n = 0; n < data->NumPart; n++) + { + f->writeReal32(data->Id[n]); + } + f->endCheckpoint(); +} + + diff --git a/src/loadGadget.hpp b/src/loadGadget.hpp index e92e21f..69f0ea6 100644 --- a/src/loadGadget.hpp +++ b/src/loadGadget.hpp @@ -9,7 +9,10 @@ namespace CosmoTool { PurePositionData *loadGadgetPosition(const char *fname); SimuData *loadGadgetMulti(const char *fname, int id, int flags, int GadgetFormat = 1); - + + // Only single snapshot supported + void writeGadget(const char *fname, SimuData *data, int GadgetFormat = 1); + }; #endif