From c0ce70fbc025f77c367a5dba2275094fd96e0078 Mon Sep 17 00:00:00 2001 From: Jiri Pittner Date: Wed, 14 Feb 2024 15:33:25 +0100 Subject: [PATCH] included kernel ridge regression module --- .gitignore | 2 + Makefile.am | 8 +- reg.cc | 851 ++++++++++++++++++++++++++ reg.h | 192 ++++++ regsurf.cc | 1526 +++++++++++++++++++++++++++++++++++++++++++++++ regsurf.h | 286 +++++++++ test_reg.cc | 131 ++++ test_regsurf.cc | 118 ++++ 8 files changed, 3111 insertions(+), 3 deletions(-) create mode 100644 reg.cc create mode 100644 reg.h create mode 100644 regsurf.cc create mode 100644 regsurf.h create mode 100644 test_reg.cc create mode 100644 test_regsurf.cc diff --git a/.gitignore b/.gitignore index 421c9a7..e3ef12c 100644 --- a/.gitignore +++ b/.gitignore @@ -54,6 +54,8 @@ t test fi tX +test_reg +test_regsurf *.tar.bz2 .*.swp # CVS default ignores end diff --git a/Makefile.am b/Makefile.am index 293c321..35d386a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,12 +1,14 @@ lib_LTLIBRARIES = libla.la -include_HEADERS = version.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h -libla_la_SOURCES = simple.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc +include_HEADERS = version.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h reg.h regsurf.h +libla_la_SOURCES = simple.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc reg.cc regsurf.cc nodist_libla_la_SOURCES = version.cc -check_PROGRAMS = t test tX +check_PROGRAMS = t test tX test_reg test_regsurf t_SOURCES = t.cc t2.cc #nodist_t_SOURCES = test_SOURCES = test.cc tX_SOURCES = tX.cc +test_reg_SOURCES = test_reg.cc +test_regsurf_SOURCES = test_regsurf.cc LDADD = .libs/libla.a ACLOCAL_AMFLAGS = -I m4 EXTRA_DIST = get_git_version LICENSE doxygen.cfg aminclude.am acinclude.m4 footer.html diff --git a/reg.cc b/reg.cc new file mode 100644 index 0000000..8e7a910 --- /dev/null +++ b/reg.cc @@ -0,0 +1,851 @@ +/* + LA: linear algebra C++ interface library + + Kernel ridge regression module Copyright (C) 2024 + Pavel Florian and Jiri Pittner or + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "reg.h" + +namespace LA { +template +T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma) + { + unsigned int i; + T result = 0; + + for (i = 0; i < dimensions; i++) + result = result + (x[i] * x[i]); + + result = std::exp(-result/(2 * sigma * sigma)); + return(result); + } + + +template +T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma) + { + unsigned int i; + T result = 0; + + for (i = 0; i < dimensions; i++) + result = result + abs(x[i]); + + result = std::exp(-abs(result)/(sigma)); + return(result); + } + + +template +T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma) + { + unsigned int i; + T result = 0; + + for (i = 0; i < dimensions; i++) + result = result + (x[i] * x[i]); + + result = std::exp(-std::pow(result, 0.5)/sigma); + return(result); + } + + +template +T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n, +const T* Matern_comb_number) + { + unsigned int i; + unsigned int k; + T part_1 = 0; // sum(x^2)^1/2 + T exponential_part; + T Matern_part = 0; + T result; + + for (i = 0; i < dimensions; i++) + part_1 = part_1 + (x[i] * x[i]); + + part_1 = std::pow(part_1, 0.5); + exponential_part = std::exp(-part_1/(sigma)); + + for (k = 0; k < n; k++) + Matern_part += Matern_comb_number[i] * std::pow(2 * part_1/sigma, n - k); + + result = exponential_part * Matern_part; + return(result); + } + + +template +unsigned int Matern_coefficient(const unsigned int n, T* comb_number) // values of (n + k)!/(2 n)! * n!/(k!(n-k)!) + { + unsigned int i , j; + unsigned int k; + unsigned int part_1; // reverse value of (n + k)!/(2 n)! + unsigned int part_2; // n!/(n-k)! + unsigned int part_3; // k! + + for (k = 0; k < n; k++) + { + part_1 = 1; + part_2 = 1; + part_3 = 1; + + for (i = n + k + 1; i <= 2 * n; i++) + part_1 = part_1 * i; + + for (i = n - k + 1; i <= n; i++) + part_2 = part_2 * i; + + for (i = 1; i < k; i++) + part_3 = part_3 * i; + + comb_number[k] = T(part_2)/T(part_1 * part_3); + } + return(0); + } + + + +template +REG::REG(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel) + { + Init(x, y , params, data_size, x_dimensions, kernel); + } + + +template +REG::REG(const string x_file, const string y_file, const string param_file) + { + unsigned int i; + ifstream xfile(x_file); + ifstream yfile(y_file); + ifstream paramfile(param_file); + string line; + T line_value; + // parameters of model + unsigned int data_size; + unsigned int x_dimensions; + string kernel; + + REG_param_file = new T[4]; + if(!REG_param_file) laerror("allocation failure in REG constructor"); + // loading parameters for regression model from param_file + if (paramfile.is_open()) + { + if (getline(paramfile, line)) + data_size = stoi(line); + else + return; + + if (getline(paramfile, line)) + x_dimensions = stoi(line); + else + return; + + if (getline(paramfile, line)) + kernel = line; + else + return; + + if (getline(paramfile, line)) + REG_param_file[0] = stod(line); + else + return; + + if (getline(paramfile, line)) + REG_param_file[1] = stod(line); + else + return; + + if (getline(paramfile, line)) + REG_param_file[2] = stod(line); + else + return; + + if (kernel == "Matern") + { + if (getline(paramfile, line)) + REG_param_file[3] = static_cast(stod(line) + 0.5); + else + return; + } + } + REG_x_file = new T[data_size * x_dimensions]; + REG_y_file = new T[data_size]; + if(!REG_x_file || !REG_y_file) laerror("allocation failure 2 in REG constructor"); + + // loading x and y data for regression model from files + if (xfile.is_open() and yfile.is_open()) + { + for (i = 0; i < data_size * x_dimensions; i++) + { + if (getline(xfile, line)) + { + line_value = stod(line); + REG_x_file[i] = line_value; + } + else + return; + } + for (i = 0; i < data_size; i++) + { + if (getline(yfile, line)) + { + line_value = stod(line); + REG_y_file[i] = line_value; + } + else + return; + } + xfile.close(); + yfile.close(); + paramfile.close(); + } + else + return; + + Init(REG_x_file, REG_y_file, REG_param_file, data_size, x_dimensions, kernel); + return; + } + + +template +void REG::Init(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, +const string kernel) + { + unsigned int i; + // Setting type of kernel + REG_kernel_type = kernel; + if (kernel != "Gaussian" and kernel != "Laplacian" and kernel != "Exponential" and kernel != "Matern") + return; + if (kernel == "Gaussian") + REG_kernel_id = 0; + if (kernel == "Laplacian") + REG_kernel_id = 1; + if (kernel == "Exponential") + REG_kernel_id = 2; + if (kernel == "Matern") + REG_kernel_id = 3; + // copy parameters and data + REG_lambda= params[0]; + REG_alpha = params[1]; + if (REG_alpha > 1) + REG_alpha = 1; + if (REG_alpha < 0) + REG_alpha = 0; + REG_sigma = params[2]; + if (kernel == "Matern") + REG_n = static_cast(params[3] + 0.5); + + REG_count_kernels = data_size; + REG_count_dimensions = x_dimensions; + REG_aloc_init = false; + { + REG_params = new T[4]; + if(!REG_params) laerror("allocation failure in REG::Init"); + if (REG_kernel_id == 3) + { + for (i = 0; i < 4; i++) + REG_params[i] = params[i]; + REG_Matern_comb_number_vec = NRVec(REG_n); + REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*(); + } + else + { + for (i = 0; i < 3; i++) + REG_params[i] = params[i]; + + REG_Matern_comb_number_vec = NRVec(1); + REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*(); + } + } + + // copy data + REG_coordinates_vec = NRVec(x, data_size * REG_count_dimensions); + REG_coordinates = REG_coordinates_vec.operator T*(); + REG_y_vec = NRVec(y , data_size); + REG_Y = REG_y_vec.operator T*(); + // for fitting model + REG_distances_mat = NRMat(data_size, data_size); + REG_distances = REG_distances_mat.operator T*(); + REG_kernels_mat = NRMat(data_size, data_size); + REG_kernels = REG_kernels_mat.operator T*(); + REG_kernels_sums_vec = NRVec(data_size); + REG_kernels_sums = REG_kernels_sums_vec.operator T*(); + REG_weights_vec = NRVec(data_size); + REG_weights = REG_weights_vec.operator T*(); + // for predictions + REG_coordinates_diff_vec = NRVec(data_size); + REG_coordinates_diff = REG_coordinates_diff_vec.operator T*(); + REG_kernels_diff_vec = NRVec(data_size); + REG_kernels_diff = REG_kernels_diff_vec.operator T*(); + REG_y_preds_vec = NRVec(data_size); + REG_y_preds = REG_y_preds_vec.operator T*(); + REG_y_delta_vec = NRVec(data_size); + REG_y_delta = REG_y_delta_vec.operator T*(); + REG_y_temp_vec = NRVec(data_size); + REG_y_temp = REG_y_temp_vec.operator T*(); + + REG_aloc_init = true; + + // calculating Matern coefficients, if Matern kernel is selected + if (REG_kernel_id == 3) + Matern_coefficient(REG_n, REG_Matern_comb_number); + + REG_base_init = true; + return; + } + + + +template +T REG::Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id) + { + T result; + + switch (kernel_id) { + case 0: + result = Gaussian_kernel(x, dimensions, REG_sigma); + break; + case 1: + result = Laplacian_kernel(x, dimensions, REG_sigma); + break; + case 2: + result = Laplacian_kernel(x, dimensions, REG_sigma); + break; + case 3: + result = Matern_kernel(x, dimensions, REG_sigma, REG_n, REG_Matern_comb_number); + break; + } + return(result); + } + + +template +unsigned int REG::Build_REG_distances_mat() + { + unsigned int i, j, k; + unsigned int mat_dimension, vec_dimension; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + mat_dimension = REG_count_kernels; + vec_dimension = REG_count_dimensions; + // building the matrix of vectors coordinates differences between kernels + for (i = 0; i < mat_dimension; i++) + { + for (j = i + 1; j < mat_dimension; j++) + for (k = 0; k < vec_dimension; k++) + { + REG_distances[(i * (mat_dimension) + j) * vec_dimension + k] = REG_coordinates[i * vec_dimension + k] - + REG_coordinates[j * vec_dimension + k]; + } + } + // copy calculated values under diagonal + for (i = 0; i < mat_dimension; i++) + { + for (j = i + 1; j < mat_dimension; j++) + for (k = 0; k < vec_dimension; k++) + { + REG_distances[(j * (mat_dimension) + i) * vec_dimension + k] = + REG_distances[(i * (mat_dimension) + j) * vec_dimension + k]; + } + } + // copy 0 to diagonal + for (i = 0; i < mat_dimension; i++) + for (k = 0; k < vec_dimension; k++) + REG_distances[(i * (mat_dimension + 1) * vec_dimension) + k] = 0; + + return(0); + } + + +template +unsigned int REG::Build_REG_kernels_mat() + { + unsigned int i, j, k; + unsigned int mat_dimension, vec_dimension; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + // copy class parameters + mat_dimension = REG_count_kernels; + vec_dimension = REG_count_dimensions; + // crete kernel coefficients matrix for gaussian kernels + for (i = 0; i < mat_dimension; i++) + for (j = i + 1; j < mat_dimension; j++) + { + REG_kernels[i * mat_dimension + j] = Kernel(®_distances[(i * (mat_dimension) + j) * vec_dimension], + vec_dimension, REG_kernel_id); + } + // copy calculated values under diagonal + for (i = 0; i < mat_dimension; i++) + for (j = i + 1; j < mat_dimension; j++) + REG_kernels[(j * (mat_dimension) + i)] = REG_kernels[(i * (mat_dimension) + j)]; + + // copy 1 to diagonal + for (i = 0; i < mat_dimension; i++) + REG_kernels[i * (mat_dimension + 1)] = 1; + + return(0); + } + + +template +unsigned int REG::Build_REG_kernels_mat_sums() + { + unsigned int i, j; + unsigned int count_kernels; + T sum; + + count_kernels = REG_count_kernels; + for (i = 0; i < count_kernels; i++) + { + sum = 0; + for (j = 0; j < count_kernels; j++) + sum += REG_kernels[(i * count_kernels) + j]; + + REG_kernels_sums[i] = sum; + } + return(0); + } + + +template +T REG::REG_RMSE() + { + T delta_square; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + T y_max = REG_y_vec.amax(); + T y_min = REG_y_vec.amin(); + + REG_y_delta_vec |= REG_y_preds_vec; + REG_y_delta_vec -= REG_y_vec; + REG_y_delta_vec *= REG_y_delta_vec; + delta_square = REG_y_delta_vec.sum(); + delta_square = delta_square/REG_count_kernels; + delta_square = std::sqrt(delta_square); + return(delta_square); + } + + +template +T REG::Loss_Function() + { + unsigned int count_x; + T RMSE; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + // calculating the root mean square error - RMSE + RMSE = REG_RMSE(); + return (RMSE); + } + + +template +unsigned int REG::REG_Fit(const T max_Loss, const unsigned int max_learning_cycles) + { // the kernel ridge learning algorithm + unsigned int i, j; + T Loss; + T Lasso_part; + T kernel_ridge_part; + unsigned int count_kernels; + + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + // checking the input parameters + if (max_learning_cycles == 0) + { + laerror("zero learning cycles"); + return(1); + } + if (max_Loss < 0) + { + laerror("negative value of maximum Loss function"); + return(1); + } + Lasso_part = REG_lambda * REG_alpha; + kernel_ridge_part = REG_lambda * (1 - REG_alpha) * 0.5; + count_kernels = REG_count_kernels; + + // initialization of REG_distances_mat and REG_kernels_mat if necessary + if (REG_mat_init == false) + { + Build_REG_distances_mat(); + Build_REG_kernels_mat(); + Build_REG_kernels_mat_sums(); + REG_mat_init = true; + } + // initialitation of weight coefficients vector + REG_weights_vec |= REG_y_vec; + // Linear learning cycle of weight coefficient for learning + for (i = 0; i < max_learning_cycles; i++) + { + // calculating preds_vec vector of predicted y values + REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec); + if (kernel_ridge_part != 0) + for (j = 0; j < REG_count_kernels; j++) + REG_y_preds[j] *= (REG_kernels_sums[j] + kernel_ridge_part)/REG_kernels_sums[j]; + + if (Lasso_part != 0) + REG_y_preds_vec += Lasso_part; + + // calculating new weights + REG_y_temp_vec |= REG_y_vec; + REG_y_temp_vec /= REG_y_preds_vec; + REG_weights_vec *= REG_y_temp_vec; + // checking the Loss_Function value + if (max_Loss > 0) + { + Loss = Loss_Function(); + if (Loss < max_Loss) + break; + } + } + return(0); + } + + +template +unsigned int REG::REG_Fit_grad(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate) + { + unsigned int i, j, k; + unsigned int count_x; + T Loss; + T delta_RMSE; + // for optimizing calculation of Kernel ridge part of Loss function difference + T kernel_ridge_part; + T delta_KRR_part; + // for optimizing calculation of Lasso part of Loss function difference + T Lasso_part; + T delta_Lasso_part; + T sqrt_count_x; + + count_x = REG_count_kernels; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + // Checking the input parameters + if (max_learning_cycles == 0) + { + laerror("zero learning cycles"); + return(1); + } + if (max_Loss < 0) + { + laerror("negative value of maximum Loss function"); + return(1); + } + // initialization of REG_distances_mat and REG_kernels_mat if necessary + if (REG_mat_init == false) + { + Build_REG_distances_mat(); + Build_REG_kernels_mat(); + Build_REG_kernels_mat_sums(); + REG_mat_init = true; + } + // initialitation of weight coefficients vector + REG_weights_vec |= REG_y_vec; + kernel_ridge_part = REG_lambda * (1.00 - REG_alpha); + Lasso_part = REG_lambda * REG_alpha; + sqrt_count_x = std::sqrt(count_x); + // gradient descent Learning cycle of weight coefficient for learning + for (i = 0; i < max_learning_cycles; i++) + { + // calculating preds_vec vector of predicted y values + REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec); + if (kernel_ridge_part != 0) + for (j = 0; j < count_x; j++) + REG_y_preds[j] *= (REG_kernels_sums[j] + kernel_ridge_part)/REG_kernels_sums[j]; + + if (Lasso_part != 0) + REG_y_preds_vec += Lasso_part; + // calculating difference of root mean square error - RMSE + for (j = 0; j < count_x; j++) + { + delta_RMSE = 0; + for (k = 0; k < count_x; k++) + delta_RMSE += (REG_y_preds[k] - REG_Y[k]) * REG_kernels[j * count_x + k]; + + delta_RMSE /=sqrt_count_x; + // calculating new weights + REG_weights[j] /= (1 - learning_rate * delta_RMSE); + } + // convergence criteria check + if (max_Loss > 0) + { + Loss = Loss_Function(); + if (Loss < max_Loss) + break; + } + } + return(0); + } + + +template +unsigned int REG::REG_Save_fitted(const string model_file) + { + unsigned int i; + ofstream modelfile(model_file); + string line; + + // checking the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + // save model to file + if (modelfile.is_open()) + for (i = 0; i < REG_count_kernels; i++) + { + line = to_string(REG_weights[i]); + modelfile << line << '\n'; + } + modelfile.close(); + + return(0); + } + + +template +unsigned int REG::REG_Load_fitted(const string model_file) + { + unsigned int i; + ifstream modelfile(model_file); + string line; + T line_value; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + // initialization of REG_distances_mat and REG_kernels_mat if necessary + if (REG_mat_init == false) + { + Build_REG_distances_mat(); + Build_REG_kernels_mat(); + REG_mat_init = true; + } + // load stored weights + if (modelfile.is_open()) + { + for (i = 0; i < REG_count_kernels; i++) + if (getline(modelfile, line)) + { + line_value = stod(line); + REG_weights[i] = line_value; + } + REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec); + } + return(0); + } + + +template +unsigned int REG::REG_Get_predictions(const T* x, T* y_preds, const unsigned int data_size) + { + unsigned int i, j, k; + T pred_value; + T Lasso_part; + T kernel_ridge_part; + T sum_kernels_diff; + T weights_sum; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + Lasso_part = REG_lambda * REG_alpha; + kernel_ridge_part = REG_lambda * (1 - REG_alpha) * 0.5; + // loop for predicting new values for each x + for (i = 0; i < data_size; i++) // For every x value + { + pred_value = 0; + // calculating the REG_coordinates_vec + for (j = 0; j < REG_count_kernels; j++) // For every kernel + { + for (k = 0; k < REG_count_dimensions; k++) // For every x coordinate + { + REG_coordinates_diff[(j * REG_count_dimensions) + k] = x[(i * REG_count_dimensions) + k] - + REG_coordinates[(j * REG_count_dimensions) + k]; + } + } + // calculating the REG_kernels_vec + if (kernel_ridge_part != 0) + { + sum_kernels_diff = 0; + for (j = 0; j < REG_count_kernels; j++) // For every kernel + { + REG_kernels_diff[j] = Kernel(®_coordinates_diff[(j * REG_count_dimensions)], REG_count_dimensions, REG_kernel_id); + pred_value += REG_kernels_diff[j] * REG_weights[j]; + sum_kernels_diff += REG_kernels_diff[j]; + } + if (sum_kernels_diff != 0) + pred_value *= (sum_kernels_diff + kernel_ridge_part)/sum_kernels_diff; + } + else + for (j = 0; j < REG_count_kernels; j++) // For every kernel + { + REG_kernels_diff[j] = Kernel(®_coordinates_diff[(j * REG_count_dimensions)], REG_count_dimensions, REG_kernel_id); + pred_value += REG_kernels_diff[j] * REG_weights[j]; + } + pred_value += Lasso_part; + // storing y prediction into memory + y_preds[i] = pred_value; + } + return(0); + } + + +template +unsigned int REG::REG_Get_Score(const T* x, T* y_preds, const unsigned int data_size, const T* y, T* score) + { + unsigned int i; + T prediction; + // check the initialization + if (REG_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + REG_Get_predictions(x, y_preds, data_size); + prediction = 0; + for (i = 0; i < data_size; i++) + prediction += (y[i] - y_preds[i]) * (y[i] - y_preds[i]); + + score[0] = prediction; + return(0); + } + + +template +unsigned int REG::REG_Set_params(const T* params, const string kernel) + { + // setting type of kernel + REG_kernel_type = kernel; + if (kernel == "Gaussian") + REG_kernel_id = 0; + if (kernel == "Laplacian") + REG_kernel_id = 1; + if (kernel == "Exponential") + REG_kernel_id = 2; + if (kernel == "Matern") + REG_kernel_id = 3; + // copy parameters and data + REG_lambda = params[0]; + REG_alpha = params[1]; + if (REG_alpha > 1) + REG_alpha = 1; + if (REG_alpha < 0) + REG_alpha = 0; + REG_sigma = params[2]; + if (kernel == "Matern") + REG_n = static_cast(params[3] + 0.5); + + // calculating Matern coefficients, if Matern kernel is selected + if (REG_kernel_id == 3) + { + REG_Matern_comb_number_vec.resize(REG_n); + REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*(); + Matern_coefficient(REG_n, REG_Matern_comb_number); + } + return(0); + } + + +template +unsigned int REG::REG_Get_params(T* params, string* kernel) + { + kernel[0] = REG_kernel_type; + params[0] = REG_lambda; + params[1] = REG_alpha; + params[2] = REG_sigma; + + if (REG_kernel_type == "Mattern") + params[3] = REG_n; + + return(0); + } + + +template +unsigned int REG::REG_Get_weight_coeff(T* weight) + { + unsigned int i; + + for (i = 0; i < REG_count_kernels; i++) + weight[i] = REG_weights[i]; + + return(0); + } + + +template +REG::~REG() + { + // dealocating memory to avoid memory leaks + if (REG_params != nullptr) + delete[] REG_params; + if (REG_x_file != nullptr) + delete[] REG_x_file; + if (REG_y_file != nullptr) + delete[] REG_y_file; + if (REG_param_file != nullptr) + delete[] REG_param_file; + if (REG_fit_file != nullptr) + delete[] REG_fit_file; + } + +//enforce instantiation + +#define INSTANTIZE(T) \ +template T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma); \ +template T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma); \ +template T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma); \ +template T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n, const T* Matern_comb_number); \ +template unsigned int Matern_coefficient(const unsigned int n, T* comb_number); \ + + +INSTANTIZE(double) + +} // end of namespace +template class REG; diff --git a/reg.h b/reg.h new file mode 100644 index 0000000..c41f68b --- /dev/null +++ b/reg.h @@ -0,0 +1,192 @@ +/* + LA: linear algebra C++ interface library + Kernel ridge regression module Copyright (C) 2024 + Pavel Florian and Jiri Pittner or + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +#include +#include +#include +#include +#include +using namespace std; +# include "mat.h" // vector support libla implementation +# include "vec.h" // compiler parameters: -llapack -lblas + +# include "la.h" +# ifndef REG_H +# define REG_H + +namespace LA { + +template +T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma); + +template +T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma); + +template +T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma); + +template +T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n, const T* Matern_comb_number); + +template +unsigned int Matern_coefficient(const unsigned int n, T* comb_number); + + + +// The kernel regression of points in 1D (curve), 2D (surface), 3D and multi-D space +template +class REG { // Kernel regression (REG) library +public: + // Constructor of kernel regression model from memory location + REG(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel); + // Constructor of kernel regression model from files + REG(const string x_file, const string y_file, const string param_file); + // Init function for constructor + void Init(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel); + // General kernel function + inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id); + // Function, that build matrix of distances between kernels + unsigned int Build_REG_distances_mat(); + // Function, that build matrix of coefficients for kernel coordinates + unsigned int Build_REG_kernels_mat(); + // Function, that build sums of matrix coefficients for kernel coordinates + unsigned int Build_REG_kernels_mat_sums(); + // Function, that return root mean square error of model + T REG_RMSE(); // sum(delta_y) + // The Loss function + T Loss_Function(); // sum(delta_y) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight))) + + // Function, that learn kernels and set weights + unsigned int REG_Fit(const T max_Loss, const unsigned int max_learning_cycles); + // Function, that learn kernels and set weights by gradient descent method + unsigned int REG_Fit_grad(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate); + // Function, that store fitted parameters of regression model to file + unsigned int REG_Save_fitted(const string model_file); + // Function, that load fitted parameters of model to file + unsigned int REG_Load_fitted(const string model_file); + // Function, that uses the kernel regression model to predict y for new data + unsigned int REG_Get_predictions(const T* x, T* y_preds, const unsigned int data_size); + // Function, that uses the kernel regression model to predict y for new data and investigate the model precision + unsigned int REG_Get_Score(const T* x, T* y_preds, const unsigned int data_size, const T* y, T* score); + // Setting parameters of kernel regression model + unsigned int REG_Set_params(const T* params, const string kernel); + // Getting parameters of kernel regression model + unsigned int REG_Get_params(T* params, string* kernel); + // Getting weight coefficients of kernels in regression model + unsigned int REG_Get_weight_coeff(T* weight); + // destructor of kernel regression model + ~REG(); + +private: + // string with type of kernel + string REG_kernel_type; + // number of type of kernel + unsigned int REG_kernel_id; + // Count of x items (dimensions) for kernels + unsigned int REG_count_dimensions; + // number of input x values/kernels + unsigned int REG_count_kernels; + // lambda parameter of regression + T REG_lambda; + // alpha parameter of regression + T REG_alpha; + // sigma parameter of kernels + T REG_sigma; + // n for Matern kernels + unsigned int REG_n; + // indicator of initialization or initialization errors + bool REG_base_init = false; + // indicator of initialization of allocation of memory + bool REG_aloc_init = false; + // indicator of initialization of REG_REG_distances_mat and REG_REG_kernels_mat + bool REG_mat_init = false; + + // Matern kernel coefficient computed from n and k + T* REG_Matern_comb_number = nullptr; + // pointer to parameters of kernel regression, various size, first is lambda, second alpha, third sigma parameter of kernels + // and fourth the n parameter, if is used Matern kernel + T* REG_params = nullptr; + // variable which contains number of learning cycles of model + unsigned int learning_cycles = 0; + + // x values of kernel regression input data + T* REG_coordinates = nullptr; + // y values of kernel regression input data + T* REG_Y = nullptr; + + // matrix of distances between kernels + T* REG_distances = nullptr; + // matrix of kernel coefficients for kernel coordinates + T* REG_kernels = nullptr; + // vector of row sums of kernel coefficients for kernel coordinates + T* REG_kernels_sums = nullptr; + // vector of weight coefficients for each kernel + T* REG_weights = nullptr; + + // vector of distances between kernels + T* REG_coordinates_diff = nullptr; + // vector of kernel coefficients for kernel coordinates + T* REG_kernels_diff = nullptr; + // vector of y predictions for each kernel + T* REG_y_preds = nullptr; + // vector of differences between y values and predictions for each kernel + T* REG_y_delta = nullptr; + // vector of temporary results for processing y values and predictions for each kernel + T* REG_y_temp = nullptr; + + // pointer to x buffer for model loaded from file + T* REG_x_file = nullptr; + // pointer to y buffer for model loaded from file + T* REG_y_file = nullptr; + // pointer to parameters buffer for model loaded from file + T* REG_param_file = nullptr; + // pointer to fitted data for model loaded from file + T* REG_fit_file = nullptr; + + // NRVec for Matern kernel coefficient computed from n and k + NRVec REG_Matern_comb_number_vec; + + // NRVec for matrix of distances between kernels + NRVec REG_coordinates_vec; + // NRVec for vector of y imput values of kernels + NRVec REG_y_vec; + // NRMat for matrix of distances between kernels + NRMat REG_distances_mat; + // NRMat for matrix of kernel coefficients for kernel coordinates + NRMat REG_kernels_mat; + // NRVec for vector of row sums of kernel coefficients for kernel coordinates + NRVec REG_kernels_sums_vec; + // NRVec for vector of weight coefficients of kernels + NRVec REG_weights_vec; + + // NRVec for matrix of coordinates differences between kernels + NRVec REG_coordinates_diff_vec; + // NRVec for matrix of kernel coefficients for kernel coordinates + NRVec REG_kernels_diff_vec; + + // NRVec for vector of y predictions of kernels + NRVec REG_y_preds_vec; + // NRVec for vector of differences between estimated real and real y values for training kernels + NRVec REG_y_delta_vec; + // NRVec for temporary results for processing y values and predictions for each kernel + NRVec REG_y_temp_vec; +}; +// Include the template classes definitions + +} // end of namespace +# endif /* REG_H */ diff --git a/regsurf.cc b/regsurf.cc new file mode 100644 index 0000000..e71a04b --- /dev/null +++ b/regsurf.cc @@ -0,0 +1,1526 @@ +/* + LA: linear algebra C++ interface library + Kernel ridge regression module Copyright (C) 2024 + Pavel Florian and Jiri Pittner or + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include "regsurf.h" + +namespace LA { + +template +unsigned int Build_s_RE_descriptor_mat(const unsigned int count_particles, +const unsigned int count_geometries, const T* distances, T* desc, const T* reference_geometry) + { + unsigned int i, j, k; + T distance; + T distance_ref; + // the RE descriptor + for (i = 0; i < count_geometries; i++) + { + // calculate upper part of descriptor matrix + for (j = 0; j < count_particles; j++) + for (k = j + 1; k < count_particles; k++) + { + distance = distances[(i * count_particles * count_particles) + (j * count_particles) + k]; + distance_ref = reference_geometry[(j * count_particles) + k]; + desc[(i * count_particles * count_particles) + (j * count_particles) + k] = distance_ref/distance; + } + // copy 0 to diagonal + for (j = 0; j < count_particles; j++) + desc[(i * count_particles * count_particles) + j * (count_particles + 1)] = 0; + // copy upper part of descriptor matrix under diagonal + for (j = 0; j < count_particles; j++) + for (k = 0; k < j; k++) + desc[(i * count_particles * count_particles) + (j * count_particles) + k] = + desc[(i * count_particles * count_particles) + (k * count_particles) + j]; + } + return(0); + } + + +template +unsigned int Build_s_coulomb_matrix_descriptor_mat(const unsigned int count_particles, +const unsigned int count_geometries, const T* distances, T* desc, const unsigned int* Z) + { + unsigned int i, j, k; + T distance; + // the coulomb matrix descriptor + for (i = 0; i < count_geometries; i++) + { + // calculate upper part of descriptor matrix + for (j = 0; j < count_particles; j++) + for (k = j + 1; k < count_particles; k++) + { + distance = distances[(i * count_particles * count_particles) + (j * count_particles) + k]; + desc[(i * count_particles * count_particles) + (j * count_particles) + k] = (Z[j] * Z[k])/distance; + } + // calculate the diagonal + for (j = 0; j < count_particles; j++) + desc[(i * count_particles * count_particles) + j * (count_particles + 1)] = std::pow(Z[j], 2.4); + // copy upper part of descriptor matrix under diagonal + for (j = 0; j < count_particles; j++) + for (k = 0; k < j; k++) + desc[(i * count_particles * count_particles) + (j * count_particles) + k] = + desc[(i * count_particles * count_particles) + (k * count_particles) + j]; + } + return(0); + } + + +template +unsigned int Build_s_inverse_pairs_descriptor_mat(const unsigned int count_particles, +const unsigned int count_geometries, const T* distances, T* desc) + { + unsigned int i, j, k; + T distance; + // the inverse pairs descriptor + for (i = 0; i < count_geometries; i++) + { + // calculate upper part of descriptor matrix + for (j = 0; j < count_particles; j++) + for (k = 0; k < count_particles; k++) + { + distance = distances[(i * count_particles * count_particles) + (j * count_particles) + k]; + desc[(i * count_particles * count_particles) + (j * count_particles) + k] = 1.00/distance; + } + // copy 0 to diagonal + for (j = 0; j < count_particles; j++) + desc[(i * count_particles * count_particles) + j * (count_particles + 1)] = 0; + // copy upper part of descriptor matrix under diagonal + for (j = 0; j < count_particles; j++) + for (k = 0; k < j; k++) + desc[(i * count_particles * count_particles) + (j * count_particles) + k] = + desc[(i * count_particles * count_particles) + (k * count_particles) + j]; + } + return(0); + } + + +// The kernel regression of surfaces of geometries in in 1D, 2D, 3D and multi-D space +// and computing of ML-NEA absorption spectra + +template +REGSURF::REGSURF(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries, +const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries, +const T* values, const unsigned int* Z) + { + Init(count_dimensions, count_kernels, count_geometries, count_levels, descriptor, kernel, params, geometries, values, Z); + } + + +template +REGSURF::REGSURF(const string geometries_file, const string y_file, const string param_file, const string Z_file) + { + unsigned int i, j, k, l; + unsigned int line_size, numbers_size; + ifstream geomfile(geometries_file); + ifstream yfile(y_file); + ifstream paramfile(param_file); + ifstream Zfile(Z_file); + string line; + T line_value; + // parameters of model + unsigned int count_dimensions; + unsigned int count_kernels; + unsigned int count_geometries; + unsigned int count_levels; + string kernel; + string descriptor; + const string numbers = "+-0123456789.,"; + string readed_number; + bool read_switch; + bool decimal_switch; + + + // loading parameters for regression model from parameters_file + if (paramfile.is_open()) + { + // load fixed paraeters of model + if (getline(paramfile, line)) + count_dimensions = stoi(line); + else + { + laerror("file with model paramaters not valid"); + return; + } + if (getline(paramfile, line)) + count_kernels = stoi(line); + else + + { + laerror("file with model paramaters not valid"); + return; + } + if (getline(paramfile, line)) + count_geometries = stoi(line); + else + { + laerror("file with model paramaters not valid"); + return; + } + if (getline(paramfile, line)) + count_levels = stoi(line); + else + { + laerror("file with model paramaters not valid"); + return; + } + // load flexible parameters of model + if (getline(paramfile, line)) + descriptor = line; + else + { + laerror("file with model paramaters not valid"); + return; + } + if (getline(paramfile, line)) + kernel = line; + else + { + laerror("file with model paramaters not valid"); + return; + } + // alloacate memory for parameters file + { + s_param_file = new T[count_levels * 2 + 2]; + if(!s_param_file) laerror("not allocated memory for parameters"); + } + + for (i = 0; i < ((2 * count_levels) + 1); i++) + { + if (getline(paramfile, line)) + s_param_file[i] = stod(line); + else + { + laerror("file with model paramaters not valid"); + return; + } + } + + if (kernel == "Matern") + { + if (getline(paramfile, line)) + s_param_file[(2 * count_levels) + 1] = static_cast(stod(line) + 0.5); + else + { + laerror("file with model paramaters not valid"); + return; + } + } + paramfile.close(); + } + // alocating memory for y data for regression model from files + { + s_geom_file = new T[count_dimensions * count_kernels * count_geometries]; + if(!s_geom_file) laerror("allocation error of s_geom_file"); + s_y_file = new T[count_geometries * count_levels]; + if(!s_y_file) laerror("allocation error of s_y_file"); + if (descriptor == "Coulomb_matrix" or Z_file != "") + { + s_Z_file = new unsigned int[count_kernels]; + if(!s_Z_file) laerror("allocation error of s_Z_file"); + } + else s_Z_file=NULL; + s_y_min = new unsigned int[count_levels]; + if(!s_y_min) laerror("allocation error of s_y_min"); + } + // loading geometrical data for regression model from file + l = 0; + numbers_size = numbers.size(); + if (geomfile.is_open()) + { + for (i = 0; i < 2 * count_dimensions * count_kernels * count_geometries; i++) + { + if (getline(geomfile, line)) + { + read_switch = false; + decimal_switch = false; + readed_number = ""; + line_size = line.size(); + for (j = 0; j < line_size; j++) + { + read_switch = false; + for (k = 0; k < numbers_size; k++) + { + if (line[j] == numbers[k]) + { + readed_number.append(line, j, 1); + read_switch = true; + if (line[j] == numbers[12] or line[j] == numbers[13]) + decimal_switch = true; + } + } + if (read_switch == false) + { + if (decimal_switch == true and readed_number != "") + { + line_value = stod(readed_number); + s_geom_file[l] = line_value; + l++; + } + readed_number = ""; + read_switch = false; + decimal_switch = false; + } + } + if (decimal_switch == true and readed_number != "") + { + line_value = stod(readed_number); + s_geom_file[l] = line_value; + l++; + } + } + if (l == count_dimensions * count_kernels * count_geometries) + break; + } + geomfile.close(); + } + else + { + laerror("file with geometries not open"); + return; + } + + if (l < count_dimensions * count_kernels * count_geometries) + { + laerror("incomplete file with geometries"); + return; + } + // loading y data for model from file + if (yfile.is_open()) + { + for (i = 0; i < count_geometries * count_levels; i++) + { + if (getline(yfile, line)) + { + line_value = stod(line); + s_y_file[i] = line_value; + } + else + { + yfile.close(); + laerror("incomplete file with y values"); + return; + } + } + } + else + { + laerror("file with y values not open"); + return; + } + // loading Z proton numbers of atoms + if (s_Z_file != nullptr and Z_file != "") + { + if (Zfile.is_open()) + { + for (i = 0; i < count_kernels; i++) + { + if (getline(Zfile, line)) + s_Z_file[i] = stoi(line); + else + { + laerror("incomplete file with Z quantum numbers "); + return; + } + } + Zfile.close(); + } + else + { + laerror("file with Z quantum numbers not open"); + return; + } + } + // callling init function with loaded parameters, y and geometries data + Init(count_dimensions, count_kernels, count_geometries, count_levels, descriptor, kernel, s_param_file, s_geom_file, s_y_file, + s_Z_file); + } + + +template +void REGSURF::Init(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries, + const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries, + const T* values, const unsigned int* Z) + { + unsigned int i, j; + T min_value; + + s_kernel_type = kernel; + if (kernel != "Gaussian" and kernel != "Laplacian" and kernel != "Exponential" and kernel != "Matern") + { + laerror("invalid type of kernel"); + return; + } + if (kernel == "Gaussian") + s_kernel_id = 0; + if (kernel == "Laplacian") + s_kernel_id = 1; + if (kernel == "Exponential") + s_kernel_id = 2; + if (kernel == "Matern") + s_kernel_id = 3; + + s_descriptor_type = descriptor; + if (descriptor == "RE") + s_descriptor_id = 0; + if (descriptor == "Coulomb_matrix") + s_descriptor_id = 1; + if (descriptor == "Inverse_pairs") + s_descriptor_id = 2; + + s_count_kernels = count_kernels; + s_count_dimensions = count_dimensions; + s_count_geometries = count_geometries; + s_count_levels = count_levels; + + // copy parameters and data + { + s_lambda = new T[count_levels]; + s_alpha = new T[count_levels]; + if(!s_lambda || !s_alpha) laerror("allocation error in REGSURF::Init"); + } + for (i = 0; i < count_levels; i++) + s_lambda[i] = params[i]; + + for (i = count_levels; i < (2 * count_levels); i++) + { + s_alpha[i - count_levels] = params[i]; + if (s_alpha[i] > 1) + s_alpha[i] = 1; + if (s_alpha[i] < 0) + s_alpha[i] = 0; + } + s_sigma = params[(2 * count_levels)]; + if (kernel == "Matern") + s_n = static_cast(params[(2 * count_levels) + 1] + 0.5); + + s_aloc_init = false; + { // for loaded parameters + if (s_kernel_id == 3) + { + s_params = new T[(2 * count_levels) + 2]; + if(!s_params) laerror("allocation error in REGSURF::Init"); + s_Matern_comb_number = new T[s_n]; + if(!s_Matern_comb_number) laerror("allocation error in REGSURF::Init"); + for (i = 0; i < ((2 * count_levels) + 2); i++) + s_params[i] = params[i]; + } + else + { + s_params = new T[(2 * count_levels) + 1]; + if(!s_params) laerror("allocation error in REGSURF::Init"); + for (i = 0; i < ((2 * count_levels) + 1); i++) + s_params[i] = params[i]; + } + + } + // creating the list of numbers of geometries with minima of PES + for (i = 0; i < count_levels; i++) + { + min_value = values[i * count_geometries]; + s_y_min[i] = 0; + for (j = 1; j < count_geometries; j++) + if (values[(i * count_kernels) + j] < min_value) + { + min_value = values[(i * count_kernels) + j]; + s_y_min[i] = j; + } + } + // NRMat for input loaded geometries [s_count_dimensions * s_count_kernels, s_count_geometries] + s_geometries_mat = NRMat(geometries, count_dimensions * count_kernels, count_geometries); + // NRMat for vector of y of geomeries for surface [s_count_geometries, s_count_levels] + s_y_mat = NRMat(values, count_levels, count_geometries); + // NRVec for vector of Z for coulomb matrix descriptor [s_count_kernels] + if (Z != nullptr) + s_Z_vec = NRVec(Z, count_kernels); + + // NRMat for coordinate distances for geometries [s_count_kernels, s_count_kernels * s_count_geometries] + s_distances_mat = NRMat(count_kernels, count_kernels * count_geometries); + // NRMat for matrix of descriptors between kernels in geometry [s_count_kernels, s_count_kernels * s_count_geometries] + s_desc_mat = NRMat(count_kernels, count_kernels * count_geometries); + // NRVec of NRMat for matrix of kernel coefficients for kernels in geometries + // [s_count_kernels, s_count_kernels * s_count_geometries] + s_kernels_vec_mat = NRVec>(count_geometries); + for (i = 0; i < count_geometries; i++) + s_kernels_vec_mat[i] = NRMat(count_kernels, count_kernels); + + // NRVec for sums of kernels rows for base geometry [s_count_kernels] + s_kernels_rows_sums_vec_vec = NRVec>(count_geometries); + for (i = 0; i < count_geometries; i++) + s_kernels_rows_sums_vec_vec[i] = NRVec(count_kernels); + // NRVec of NRVec for differences of sums kernels rows between base and other geometries [s_count_kernels, s_count_geometries] + s_kernels_diff_vec_vec = NRVec>(count_geometries); + for (i = 0; i < count_geometries; i++) + s_kernels_diff_vec_vec[i] = NRVec(count_kernels); + // NRVec for differences of sums kernels rows and columns between base and other geometries [s_count_geometries] + s_kernels_diff_sums_vec = NRVec(count_geometries); + + // NRVec of NRVec for vector of weight coefficients of surface kernels in surface [s_count_kernels, s_count_levels] + s_weights_vec_vec = NRVec>(count_levels); + for (i = 0; i < count_levels; i++) + s_weights_vec_vec[i] = NRVec(count_kernels); + + // NRVec for vector of y of geomeries for surface [s_count_geometries] + s_y_vec = NRVec(s_y_mat.operator T*(), count_geometries); + // NRVec for vector of y predictions of geomeries for surface [s_count_geometries] + s_y_preds_vec = NRVec(count_geometries); + // NRVec for vector of delta y values for surface [s_count_geometries] + s_y_delta_vec = NRVec(count_geometries); + // NRVec for vector of temporary y values for surface [s_count_geometries] + s_y_temp_vec = NRVec(count_geometries); + // NRVec for vector of partial temporary y values for geometry [s_count_kernels] + s_y_temp_part_vec = NRVec(count_kernels); + + // NRMat for distances in geometry for predictions [s_count_kernels, s_count_kernels] + s_distances_pred_mat = NRMat(s_count_kernels, s_count_kernels); + // NRMat for matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels] + s_desc_pred_mat = NRMat(s_count_kernels, s_count_kernels); + // NRVec of NRMat for matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels] + s_kernels_pred_vec_mat = NRVec>(1); + s_kernels_pred_vec_mat[0] = NRMat(s_count_kernels, s_count_kernels); + + // for loaded geometries and surfaces + s_geometries = s_geometries_mat.operator T*(); + s_y = s_y_mat.operator T*(); + // NRVec for vector of Z for coulomb matrix descriptor + s_Z = s_Z_vec.operator unsigned int*(); + // for computing weights coefficiensts of kernels and making predictions + s_distances = s_distances_mat.operator T*(); + s_desc = s_desc_mat.operator T*(); + s_distances = s_distances_mat.operator T*(); + s_kernels = s_kernels_vec_mat[0].operator T*(); + s_kernels_rows_sums = s_kernels_rows_sums_vec_vec[0].operator T*(); + s_kernels_diff = s_kernels_diff_vec_vec[0].operator T*(); + s_kernels_diff_sums = s_kernels_diff_sums_vec.operator T*(); + s_weights = s_weights_vec_vec[0].operator T*(); + + s_y_preds = s_y_preds_vec.operator T*(); + s_y_delta = s_y_delta_vec.operator T*(); + s_y_temp = s_y_temp_vec.operator T*(); + s_y_temp_part = s_y_temp_part_vec.operator T*(); + s_distances_pred = s_distances_pred_mat.operator T*(); + s_desc_pred = s_desc_pred_mat.operator T*(); + s_kernels_pred = s_kernels_pred_vec_mat[0].operator T*(); + s_aloc_init = true; + // calculating Matern coefficients, if Matern kernel is selected + if (s_kernel_id == 3) + Matern_coefficient(s_n, s_Matern_comb_number); + + s_base_init = true; + return; + } + + +template +T REGSURF::Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id) + { + T result; + + switch (kernel_id) { + case 0: + result = Gaussian_kernel(x, dimensions, s_sigma); + break; + case 1: + result = Laplacian_kernel(x, dimensions, s_sigma); + break; + case 2: + result = Laplacian_kernel(x, dimensions, s_sigma); + break; + case 3: + result = Matern_kernel(x, dimensions, s_sigma, s_n, s_Matern_comb_number); + break; + } + return(result); + } + + + +template +unsigned int REGSURF::Build_s_distances_mat(const unsigned int count_geometries, const T* geometries, T* distances) + { + unsigned int i, j, k, l; + unsigned int count_dimensions, count_kernels; + T distance; + + count_dimensions = s_count_dimensions; + count_kernels = s_count_kernels; + // filling the coordinate distances matrix + if (s_base_init == true) + for (i = 0; i < count_geometries; i++) + for (j = 0; j < count_kernels; j++) + for (k = 0; k < count_kernels; k++) + { + distance = 0; + for (l = 0; l < count_dimensions; l++) + { + distance += std::pow(geometries[(i * count_kernels * count_dimensions) + (j * count_dimensions) + l] - + geometries[(i * count_kernels * count_dimensions) + (k * count_dimensions) + l], 2); + } + distances[(i * count_kernels * count_kernels) + (j * count_kernels) + k] = std::pow(distance, 0.5); + } + else + { + laerror("regression model not initialized"); + return(1); + } + return(0); + } + + +template +unsigned int REGSURF::Build_s_descriptor_mat(const unsigned int count_geometries, const T* distances, T* desc) + { + unsigned int i, j, k, l; + unsigned int count_kernels, count_dimensions; + T distance, distance_ref; + T descriptor; + + count_dimensions = s_count_dimensions; + count_kernels = s_count_kernels; + // filling the descriptor matrix + if (s_base_init == true) + { + // the RE descriptor + switch (s_descriptor_id) { + case 0: + Build_s_RE_descriptor_mat(s_count_kernels, count_geometries, distances, desc, s_distances); + break; + case 1: + Build_s_coulomb_matrix_descriptor_mat(s_count_kernels, count_geometries, distances, desc, s_Z); + break; + case 2: + Build_s_inverse_pairs_descriptor_mat(s_count_kernels, count_geometries, distances, desc); + break; + case 3: + extern_desc(s_count_kernels, count_geometries, distances, desc); + break; + laerror("unknown decsriptor identification number"); + return(1); + } + } + return(0); + } + + +template +unsigned int REGSURF::Build_s_kernels_mat(const unsigned int count_geometries, const T* desc, NRVec> &kernels_vec_mat) + { + unsigned int i, j, k; + unsigned int count_kernels; + T descriptor_difference; + T* matrix; + + count_kernels = s_count_kernels; + // create matrices of kernel coefficients for each geometry + for (i = 0; i < count_geometries; i++) + { + matrix = kernels_vec_mat[i].operator T*(); + // creating upper part of matrix of kernel coefficinnts and the diagonal + for (j = 0; j < count_kernels; j++) + for (k = j + 1; k < count_kernels; k++) + { + descriptor_difference = desc[(i * count_kernels * count_kernels) + (j * count_kernels) + k] - + s_desc[(j * count_kernels) + k]; + matrix[(j * count_kernels) + k] = Kernel(&descriptor_difference, 1, s_kernel_id); + } + // creating the diagonal + for (j = 0; j < count_kernels; j++) + { + if (desc[(i * count_kernels * count_kernels) + j * (count_kernels + 1)] != 0) + { + descriptor_difference = desc[(i * count_kernels * count_kernels) + j * (count_kernels + 1)] - + s_desc[(i * count_kernels * count_kernels) + j * (count_kernels + 1)]; + matrix[j * (count_kernels + 1)] = Kernel(&descriptor_difference, 1, s_kernel_id);; + } + else + matrix[j * (count_kernels + 1)] = 0; + } + // copying upper part of matrix down + for (j = 0; j < count_kernels; j++) + for (k = 0; k < j; k++) + matrix[(j * count_kernels) + k] = matrix[(k * count_kernels) + j]; + } + return(0); + } + + +template +unsigned int REGSURF::Build_s_kernels_mat_sums() + { + unsigned int i, j, k; + unsigned int count_kernels, count_geometries; + T sum; + T* matrix; + T* vector; + T* temp; + T* kernels_mat; + + count_geometries = s_count_geometries; + count_kernels = s_count_kernels; + temp = s_y_temp_vec.operator T*(); + + // create matrix of kernel sums differences for each kernel geometry + // fixed bug on rsum() function + for (i = 0; i < count_geometries; i++) + { + matrix = s_kernels_vec_mat[i].operator T*(); + vector = s_kernels_rows_sums_vec_vec[i].operator T*(); + for (j = 0; j < count_kernels; j++) + { + sum = 0; + for (k = 0; k < count_kernels; k++) + sum += matrix[(j * count_kernels) + k]; + + vector[j] = sum; + } + } + + for (i = 0; i < count_geometries; i++) + { + kernels_mat = s_kernels_vec_mat[i].operator T*(); + for (j = 0; j < count_kernels; j++) + { + sum = 0; + for (k = 0; k < count_kernels; k++) + sum += kernels_mat[(j * count_kernels) + k]; + + temp[j] = sum; + } + s_kernels_diff_vec_vec[i] |= s_kernels_rows_sums_vec_vec[0]; + s_kernels_diff_vec_vec[i] -= s_y_temp_vec; + } + // create vector of kernel sums differences for each geometry + for (i = 0; i < count_geometries; i++) + s_kernels_diff_sums_vec[i] = s_kernels_diff_vec_vec[i].sum(); + + // calculation of kernels matrix sum for base geometry + s_kernels_sum_sum = T(count_kernels * count_kernels); + return(0); + } + + +template +T REGSURF::s_RMSE(const unsigned int level) + { + unsigned int i; + T delta_square; + T Lasso_part; + T kernel_ridge_part; + T sum_kernels_base; + T y_pred_base; + T y_pred; + + // check the initialization + if (s_base_init == false) + { + laerror("regression model not initialized"); + return(1); + } + if (level >= s_count_levels) + { + laerror("level number out of model range"); + return(1); + } + + if (s_descriptor_id == 1) + sum_kernels_base = s_count_kernels * s_count_kernels; + else + sum_kernels_base = s_count_kernels * (s_count_kernels - 1.00); + + Lasso_part = s_lambda[level] * s_alpha[level]; + kernel_ridge_part = s_lambda[level] * (1 - s_alpha[level]) * 0.5 * s_count_kernels; + // predictions for all geometries + for (i = 0; i < s_count_geometries; i++) + { + s_y_temp_part_vec |= s_kernels_rows_sums_vec_vec[i]; + s_y_temp_part_vec *= s_weights_vec_vec[level]; + y_pred = s_y_temp_part_vec.sum(); + if (kernel_ridge_part != 0) + y_pred *= (sum_kernels_base - s_kernels_diff_sums_vec[i] + kernel_ridge_part)/ + (sum_kernels_base - s_kernels_diff_sums_vec[i]); + + y_pred += Lasso_part; + s_y_preds[i] = y_pred; + } + + s_y_delta_vec |= s_y_preds_vec; + // subtracting real values + s_y_delta_vec -= s_y_mat.row(level); + // the second power + s_y_delta_vec *= s_y_delta_vec; + // the sum + delta_square = s_y_delta_vec.sum(); + // calculating RMSE + delta_square = delta_square/T(s_count_geometries); + delta_square = std::sqrt(delta_square); + return(delta_square); + } + + +template +T REGSURF::Loss_Function(const unsigned int level) + { + unsigned int i; + unsigned int count_x; + T RMSE; + T KRR_part; + T Lasso_part; + // check the initialization + if (s_base_init == false) + { + laerror("regression model not initialized"); + return(1); + } + // calculating the root mean square error - RMSE + RMSE = s_RMSE(level); + return (RMSE); + } + + +template +unsigned int REGSURF::s_Fit(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate, +const bool element_invariant, unsigned int* kernel_classes) + { + unsigned int i, j, k, l, m; + unsigned int count_kernels, count_geometries, count_levels; + unsigned int element_count; + unsigned int* kernel_classes_index; + T init_weight; + T y_pred, y_pred_2, y_pred_3; + T y_diff; + T Loss; + T Lasso_part; + T kernel_ridge_part; + T element_sum; + T sum_kernels_base; + T y_pred_base; + + count_kernels = s_count_kernels; + count_geometries = s_count_geometries; + count_levels = s_count_levels; + if (kernel_classes == nullptr) + kernel_classes_index = s_Z; + else + kernel_classes_index = kernel_classes; + // check the initialization + if (s_base_init == false) + { + laerror("regression model not initialized"); + return(1); + } + // checking the input parameters + if (max_learning_cycles == 0) + { + laerror("zero learning cycles"); + return(1); + } + if (max_Loss < 0) + { + laerror("negative value of maximum Loss function"); + return(1); + } + // initialization of s_distances_mat and s_kernels_mat if necessary + if (s_distances_mat_init == false) + { + Build_s_distances_mat(s_count_geometries, s_geometries, s_distances); + s_distances_mat_init = true; + } + if (s_descriptors_mat_init == false) + { + Build_s_descriptor_mat(s_count_geometries, s_distances, s_desc); + s_descriptors_mat_init = true; + } + if (s_kernels_mat_init == false) + { + Build_s_kernels_mat(s_count_geometries, s_desc, s_kernels_vec_mat); + Build_s_kernels_mat_sums(); + s_kernels_mat_init = true; + } + sum_kernels_base = s_kernels_rows_sums_vec_vec[0].sum(); + // the training loop for all surfaces + for (i = 0; i < count_levels; i++) + { + Lasso_part = s_lambda[i] * s_alpha[i]; + kernel_ridge_part = s_lambda[i] * (1 - s_alpha[i]) * 0.5 * count_kernels; + // initialitation of weight coefficients vector + init_weight = (s_y[i] - Lasso_part)/(s_kernels_sum_sum + kernel_ridge_part); + for (j = 0; j < count_kernels; j++) + s_weights_vec_vec[i][j] = init_weight; + + for (j = 0; j < max_learning_cycles; j++) + { + for (k = 1; k < count_geometries; k++) + { + // get the model prediction for another geometry + s_y_temp_part_vec |= s_kernels_rows_sums_vec_vec[k]; + s_y_temp_part_vec *= s_weights_vec_vec[i]; + y_pred = s_y_temp_part_vec.sum(); + // calculating kernel ridge part of prediction + if (kernel_ridge_part != 0) + y_pred_2 = y_pred * kernel_ridge_part/(sum_kernels_base - s_kernels_diff_sums_vec[k]); + else + y_pred_2 = 0; + + y_pred_3 = y_pred + y_pred_2 + Lasso_part; + // calculating difference + y_diff = y_pred_3 - s_y[k + (i * count_geometries)]; + // calculating new weights + s_y_temp_part_vec |= s_kernels_diff_vec_vec[k]; + s_y_temp_part_vec /= s_kernels_diff_sums_vec[k]; + // optimalization of kernel ridge regression + if (kernel_ridge_part != 0) + { + s_y_temp_vec |= s_weights_vec_vec[i]; + s_y_temp_vec /= s_weights_vec_vec[i].sum()/y_pred_2; + s_y_temp_part_vec -= s_y_temp_vec; + } + if (element_invariant == true and kernel_classes_index != nullptr) + { + // for element ivariant kernels + for (l = 0; l < count_kernels; l++) + { + element_sum = 0; + element_count = 0; + for (m = 0; m < count_kernels; m++) + { + if (kernel_classes_index[m] == kernel_classes_index[l] and m < l) + break; + if (kernel_classes_index[m] == kernel_classes_index[l] and m >= l) + { + element_sum += s_y_temp_part[l]; + element_count += 1; + } + } + if (element_count > 1) + { + element_sum /= element_count; + s_y_temp_part[l] = element_sum; + for (m = l + 1; m < count_kernels; m++) + { + if (kernel_classes_index[m] == kernel_classes_index[l]) + s_y_temp_part[m] = element_sum; + } + } + } + } + s_y_temp_part_vec /= (T(count_geometries)/(learning_rate * y_diff)); + s_weights_vec_vec[i] -= s_y_temp_part_vec; + } + // convergence criteria check + if (max_Loss > 0) + { + Loss = Loss_Function(i); + if (Loss < max_Loss) + break; + } + } + } + return(0); + } + + +template +unsigned int REGSURF::s_Save_fitted(const string weights) + { + unsigned int i, j; + unsigned int count_kernels, count_levels; + ofstream weightsfile(weights); + string line; + + count_kernels = s_count_kernels; + count_levels = s_count_levels; + if (weightsfile.is_open()) + { + for (i = 0; i < count_levels; i++) + { + for (j = 0; j < count_kernels; j++) + { + line = to_string(s_weights_vec_vec[i][j]); + weightsfile << line << '\n'; + } + } + weightsfile.close(); + } + else + { + laerror("not open file for weights"); + return(1); + } + + return(0); + } + + +template +unsigned int REGSURF::s_Load_fitted(const string weights) + { + unsigned int i, j; + unsigned int count_kernels, count_levels; + ifstream weightsfile(weights); + string line; + T line_value; + + count_kernels = s_count_kernels; + count_levels = s_count_levels; + // initialization of REG_distances_mat and REG_kernels_mat if necessary + if (s_distances_mat_init == false) + { + Build_s_distances_mat(s_count_geometries, s_geometries, s_distances); + s_distances_mat_init = true; + } + if (s_descriptors_mat_init == false) + { + Build_s_descriptor_mat(s_count_geometries, s_distances, s_desc); + s_descriptors_mat_init = true; + } + if (s_kernels_mat_init == false) + { + Build_s_kernels_mat(s_count_geometries, s_desc, s_kernels_vec_mat); + Build_s_kernels_mat_sums(); + s_kernels_mat_init = true; + } + if (weightsfile.is_open()) + { + for (i = 0; i < count_levels; i++) + for (j = 0; j < count_kernels; j++) + { + if (getline(weightsfile, line)) + { + line_value = stod(line); + s_weights_vec_vec[i][j] = line_value; + } + else + { + laerror("not open file with weights"); + return(1); + } + } + weightsfile.close(); + } + return(0); + } + + +template +unsigned int REGSURF::s_Get_predictions(const unsigned int count_geometries, const unsigned int count_levels, +const T* geom, const unsigned int* surfaces, T* predictions) + { + unsigned int i, j, k; + unsigned int count_kernels; + T distance; + T Lasso_part; + T kernel_ridge_part; + T sum_kernels; + T pred_1, pred_2, pred_3; + + if (s_base_init == false) + { + laerror("regression model not inicialized"); + return(1); + } + for (i = 0; i < count_levels; i++) + if (surfaces[i] >= s_count_levels) + { + laerror("invalid number of surface level"); + return(1); + } + + count_kernels = s_count_kernels; + + for (i = 0; i < count_geometries; i++) + { + // building distances, descriptors and kernels matrices + Build_s_distances_mat(1, geom + (i * s_count_dimensions * s_count_kernels), s_distances_pred); + Build_s_descriptor_mat(1, s_distances_pred, s_desc_pred); + Build_s_kernels_mat(1, s_desc_pred, s_kernels_pred_vec_mat); + // replacing errored rsum() function + for (j = 0; j < count_kernels; j++) + { + sum_kernels = 0; + for (k = 0; k < count_kernels; k++) + sum_kernels += s_kernels_pred[(j * count_kernels) + k]; + + s_y_temp[j] = sum_kernels; + } + sum_kernels = s_y_temp_vec.sum(); + + for (j = 0; j < count_levels; j++) + { + Lasso_part = s_lambda[surfaces[j]] * s_alpha[surfaces[j]]; + kernel_ridge_part = s_lambda[surfaces[j]] * (1 - s_alpha[surfaces[j]]) * 0.5 * count_kernels; + // getting prediction + if (surfaces[j] < s_count_levels) + { + s_y_temp_part_vec |= s_y_temp_vec; + s_y_temp_part_vec *= s_weights_vec_vec[surfaces[j]]; + } + else + { + laerror("invalid number of surface level"); + return(1); + } + + pred_1 = s_y_temp_part_vec.sum(); + if (kernel_ridge_part != 0) + { + pred_2 = pred_1 * kernel_ridge_part/sum_kernels; + } + else + pred_2 = 0; + + pred_3 = Lasso_part; + predictions[(i * count_levels) + j] = pred_1 + pred_2 + pred_3; + } + } + return(0); + } + + +template +unsigned int REGSURF::s_Score(const unsigned int count_geometries, const unsigned int count_levels, +const T* geom, const unsigned int* surfaces, T* predictions, T* y_values, T* score) + { + unsigned int i; + T RMSE; + + s_Get_predictions(count_geometries, count_levels, geom, surfaces, predictions); + for (i = 0; i < count_geometries * count_levels; i++) + RMSE += (predictions[i] - y_values[i]) * (predictions[i] - y_values[i]); + + RMSE = RMSE/(count_geometries * count_levels); + RMSE = std::sqrt(RMSE); + score[0] = RMSE; + return(0); + } + + +template +unsigned int REGSURF::s_Set_params(const string descriptor, const string kernel, const T* params) + { + unsigned int i; + unsigned int count_levels; + unsigned int new_descriptor_id; + unsigned int new_kernel_id; + + if (s_base_init == false) + { + laerror("regression model not initialized"); + return(1); + } + + count_levels = s_count_levels; + s_kernel_type = kernel; + if (kernel != "Gaussian" and kernel != "Laplacian" and kernel != "Exponential" and kernel != "Matern") + { + laerror("invalid type of kernel"); + return(1); + } + if (kernel == "Gaussian") + new_kernel_id = 0; + if (kernel == "Laplacian") + new_kernel_id = 1; + if (kernel == "Exponential") + new_kernel_id = 2; + if (kernel == "Matern") + new_kernel_id = 3; + + s_descriptor_type = descriptor; + if (descriptor == "RE") + new_descriptor_id = 0; + if (descriptor == "Coulomb_matrix") + new_descriptor_id = 1; + if (descriptor == "Inverse_pairs") + new_descriptor_id = 2; + + if (new_kernel_id != s_kernel_id) + { + s_kernel_id = new_kernel_id; + s_kernels_mat_init = false; + } + if (new_descriptor_id != s_descriptor_id) + { + s_descriptor_id = new_descriptor_id; + s_kernels_mat_init = false; + s_descriptors_mat_init = false; + } + + // copy parameters and data + for (i = 0; i < count_levels; i++) + s_lambda[i] = params[i]; + + for (i = count_levels; i < (2 * count_levels); i++) + { + s_alpha[i - count_levels] = params[i]; + if (s_alpha[i - count_levels] > 1) + s_alpha[i - count_levels] = 1; + if (s_alpha[i - count_levels] < 0) + s_alpha[i - count_levels] = 0; + } + s_sigma = params[2 * count_levels]; + + if (s_kernel_id == 3) + for (i = 0; i < ((2 * count_levels) + 2); i++) + s_params[i] = params[i]; + else + for (i = 0; i < ((2 * count_levels) + 1); i++) + s_params[i] = params[i]; + + if (s_kernel_id == 3) + { + s_n = static_cast(params[(2 * count_levels) + 1] + 0.5); + if (s_Matern_comb_number != nullptr) + { + delete[] s_Matern_comb_number; + s_Matern_comb_number = nullptr; + } + { + s_Matern_comb_number = new T[s_n]; + if(!s_Matern_comb_number) laerror("allocation error of s_Matern_comb_number"); + Matern_coefficient(s_n, s_Matern_comb_number); + } + } + return(0); + } + + +template +unsigned int REGSURF::s_Get_params(string* descriptor, string* kernel, T* params) + { + unsigned int i; + unsigned int count_levels; + + if (s_base_init == false) + { + laerror("regression model not initialized"); + return(1); + } + + count_levels = s_count_levels; + descriptor[0] = s_descriptor_type; + kernel[0] = s_kernel_type; + + if (s_kernel_id != 3) + for (i = 0; i < ((2 * count_levels) + 1); i++) + params[i] = s_params[i]; + else + for (i = 0; i < ((2 * count_levels) + 2); i++) + params[i] = s_params[i]; + + return(0); + } + + +template +unsigned int REGSURF:: s_Set_extern_descriptor(unsigned int (*extern_func)(const unsigned int, const unsigned int, const T*, T*)) + { + extern_desc = extern_func; + extern_descriptor = true; + s_descriptor_id = 3; + return(0); + } + + +template +unsigned int REGSURF::s_Get_weight_coeff(const unsigned int s_number, T* weights) + { + unsigned int i; + unsigned int count_kernels; + + count_kernels = s_count_kernels; + for (i = 0; i < count_kernels; i++) + weights[i] = s_weights_vec_vec[s_number][i]; + + return(0); + } + + +template +unsigned int REGSURF::s_Load_ML_NEA_geometries(const unsigned int count_geometries, const T* geometries) + { + ML_NEA_count_geometries = count_geometries; + s_ML_NEA_geometries_mat = NRMat(geometries, s_count_dimensions * s_count_kernels, count_geometries); + s_ML_NEA_geometries = s_ML_NEA_geometries_mat.operator T*(); + s_ML_NEA_input_init = true; + return(0); + } + + +template +unsigned int REGSURF::s_Load_ML_NEA_geometries(const unsigned int count_geometries, const string geometries_file) + { + unsigned int i, j, k, l; + unsigned int count_dimensions, count_kernels; + unsigned int line_size, numbers_size; + ifstream geomfile(geometries_file); + string line; + T line_value; + const string numbers = "+-0123456789.,"; + string readed_number; + bool read_switch; + bool decimal_switch; + + count_dimensions = s_count_dimensions; + count_kernels = s_count_kernels; + ML_NEA_count_geometries = count_geometries; + s_ML_NEA_geometries_mat = NRMat(s_count_dimensions * s_count_kernels, count_geometries); + s_ML_NEA_geometries = s_ML_NEA_geometries_mat.operator T*(); + + l = 0; + numbers_size = numbers.size(); + if (geomfile.is_open()) + { + for (i = 0; i < 3 * count_dimensions * count_kernels * count_geometries; i++) + { + if (getline(geomfile, line)) + { + read_switch = false; + decimal_switch = false; + readed_number = ""; + line_size = line.size(); + for (j = 0; j < line_size; j++) + { + read_switch = false; + for (k = 0; k < numbers_size; k++) + { + if (line[j] == numbers[k]) + { + readed_number.append(line, j, 1); + read_switch = true; + if (line[j] == numbers[12] or line[j] == numbers[13]) + decimal_switch = true; + } + } + if (read_switch == false) + { + if (decimal_switch == true and readed_number != "") + { + line_value = stod(readed_number); + s_ML_NEA_geometries[l] = line_value; + l++; + } + readed_number = ""; + read_switch = false; + decimal_switch = false; + } + } + if (decimal_switch == true and readed_number != "") + { + line_value = stod(readed_number); + s_ML_NEA_geometries[l] = line_value; + l++; + } + } + if (l == count_dimensions * count_kernels * count_geometries) + break; + } + geomfile.close(); + } + else + { + laerror("file with geometry is not open"); + return(1); + } + + if (l < count_dimensions * count_kernels * count_geometries) + { + laerror("input ML-NEA geometries not complete"); + return(1); + } + + s_ML_NEA_input_init = true; + return(0); + } + + +template +unsigned int REGSURF::s_Predict_ML_NEA_geometries() + { + unsigned int i; + T control_value; + // getting predistions + if (s_base_init == false) + { + laerror("regression model not initialized"); + return(1); + } + if (s_ML_NEA_input_init == false) + { + laerror("ML-NEA model input data not initialized"); + return(1); + } + + s_ML_NEA_surf_numbers_vec = NRVec(s_count_levels); + s_ML_NEA_surf_numbers = s_ML_NEA_surf_numbers_vec.operator unsigned int*(); + for (i = 0; i < s_count_levels; i++) + s_ML_NEA_surf_numbers[i] = i; + + s_ML_NEA_y_mat = NRMat(ML_NEA_count_geometries, s_count_levels); + s_ML_NEA_y = s_ML_NEA_y_mat.operator T*(); + control_value = s_Get_predictions(ML_NEA_count_geometries, s_count_levels, s_ML_NEA_geometries, s_ML_NEA_surf_numbers, s_ML_NEA_y); + if (control_value == 0) + s_ML_NEA_surf_init = true; + return(control_value); + } + + +template +unsigned int REGSURF::s_Compute_ML_NEA_spectra(const T E_min, const T step, const unsigned int count_steps, const T delta) + { // sigma(E) = (e^2 *h)/(4*m_e*c*E_0*E) * sum_{n=1}^N\(1/Np * sum_{i=1}^Np * delta_E_0n(x_i) * f_0n(x_i) * f_g(E - delta_E_0n, delta)) + // delta_E_0n(x_i) = delta_E_min_n/delta_E_0n + // f_g = 1/(sqrt(Phi) * delta * delta) * exp(-((E-delta_E_0n)^2)/(0.5 * delta * delta)) + unsigned int i, j, k; + unsigned int count_kernels, count_levels, count_geometries; + T y_min; + T h = 6.62607015E-34; + T e = 1.602176634E-19; + T m_e = 9.1093837E-31; + T c = 299796000; + T E_0 = 8.8541878128E-12; + T Phi = (1.00 + std::sqrt(5))/2.00; + T pre_sum_part; + T E; + T delta_E_0n; + T f_0n; + T f_g; + T sigma_part; + + count_kernels = s_count_kernels; + count_levels = s_count_levels; + count_geometries = ML_NEA_count_geometries; + if (s_base_init == false) + { + laerror("regression model not initialized"); + return(1); + } + if (s_ML_NEA_input_init == false) + { + laerror("ML-NEA model input data not initialized"); + return(1); + } + + // calculating minima of surfaces + s_ML_NEA_min_y_vec = NRVec(s_count_levels); + s_ML_NEA_min_y = s_ML_NEA_min_y_vec.operator T*(); + for (i = 0; i < count_levels; i++) + { + y_min = s_ML_NEA_y[i * count_kernels]; + for (j = 1; j < count_kernels; j++) + { + if (s_ML_NEA_y[(i * count_kernels) + j] < y_min) + y_min = s_ML_NEA_y[(i * count_kernels) + j]; + } + s_ML_NEA_min_y[i] = y_min; + } + // calculating spectra + s_ML_NEA_spectra_vec = NRVec(count_steps); + s_ML_NEA_spectra = s_ML_NEA_spectra_vec.operator T*(); + pre_sum_part = (e * e * h)/(4 * m_e * c * E_0); + E = E_min; + for (i = 0; i < count_steps; i++) + s_ML_NEA_spectra[i] = 0; + + for (i = 0; i < count_geometries; i++) + { + for (j = 0; j < count_levels; j++) + { + // calculating the oscilator strenght + delta_E_0n = s_ML_NEA_y[(j * count_geometries) + i]; + f_0n = s_ML_NEA_min_y[j]/delta_E_0n; + E = E_min; + if (delta_E_0n > 0 and f_0n > 0) + for (k = 0; k < count_steps; k++) + { + // calculating gaussian function f_g partial intensity and adding to ML_NEA intensities vector + if (abs(E - delta_E_0n)/delta < 100) + { + f_g = 1/(std::sqrt(Phi) * delta); + f_g *= std::exp(-((E - delta_E_0n)*(E - delta_E_0n))/(0.5 * delta * delta)); + if (E > 0 and f_g > 0) + { + sigma_part = pre_sum_part/E * delta_E_0n * f_0n * f_g; + s_ML_NEA_spectra[k] += sigma_part; + } + } + E += step; + } + } + } + s_ML_NEA_spectra_init = true; + return(0); + } + + +template +unsigned int REGSURF::s_Save_ML_NEA_spectra(const string ML_NEA_spectra_file) + { + unsigned int i; + unsigned int spectra_size; + T spectra_sum; + ofstream ML_NEA_file(ML_NEA_spectra_file); + string line; + + spectra_size = s_ML_NEA_spectra_vec.size(); + spectra_sum = s_ML_NEA_spectra_vec.sum(); + if (ML_NEA_file.is_open()) + { + for (i = 0; i < spectra_size; i++) + { + line = to_string(s_ML_NEA_spectra[i]/spectra_sum); + ML_NEA_file << line << '\n'; + } + ML_NEA_file.close(); + } + else + { + laerror("file for ML-NEA spectra not open"); + return(1); + } + + return(0); + } + + +template +REGSURF::~REGSURF() + { + // dealocating memory to avoid memory leaks + if (s_lambda != nullptr) + delete[] s_lambda; + if (s_alpha != nullptr) + delete[] s_alpha; + if (s_params != nullptr) + delete[] s_params; + if (s_Matern_comb_number != nullptr) + delete[] s_Matern_comb_number; + if (s_y_min != nullptr) + delete[] s_y_min; + if (s_geom_file != nullptr) + delete[] s_geom_file; + if (s_y_file != nullptr) + delete[] s_y_file; + if (s_Z_file != nullptr) + delete[] s_Z_file; + } +} // end of namespace + +using namespace LA; + +//enforce instantiation + + +template class REGSURF; diff --git a/regsurf.h b/regsurf.h new file mode 100644 index 0000000..45f5550 --- /dev/null +++ b/regsurf.h @@ -0,0 +1,286 @@ +/* + LA: linear algebra C++ interface library + Kernel ridge regression module Copyright (C) 2024 + Pavel Florian and Jiri Pittner or + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +#ifndef REGSURF_H +#define REGSURF_H + +#include +#include +#include +#include +#include + +using namespace std; + +# include "mat.h" // vector support libla implementation +# include "vec.h" // compiler parameters: -llapack -lblas +# include "reg.h" + +namespace LA { + +template +unsigned int Build_s_RE_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const T* reference_geometry); + +template +unsigned int Build_s_coulomb_matrix_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const unsigned int* Z); + +template +unsigned int Build_s_inverse_pairs_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc); + + +// The kernel regression of surfaces of geometries in in 1D, 2D, 3D and multi-D space +// and computing of ML-NEA absorption spectra +template +class REGSURF { // REG for potential energy surfaces (PES) and other quantities surfaces +public: + // constructor of surface REG model from memory locations + REGSURF(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries, + const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries, + const T* values, const unsigned int* Z = nullptr); + // constructor of surface REG model from files + REGSURF(const string geometries_file, const string energies_file, const string parameters_file, const string Z_file = ""); + // inicialization function of REG surface model + void Init(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries, + const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries, + const T* values, const unsigned int* Z); + // General kernel function + inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id); + // function for building matrix of geometry distances between kernels + unsigned int Build_s_distances_mat(const unsigned int count_geometries, const T* geometries, T* distances); + // function for building descriptor matrix + unsigned int Build_s_descriptor_mat(const unsigned int count_geometries, const T* distances, T* desc); + // Function, that build matrix of coefficients for kernel coordinates + unsigned int Build_s_kernels_mat(const unsigned int count_geometries, const T* desc, NRVec> &kernels_vec_mat); + // Function, that build sums of matrix coefficients for kernel coordinates + unsigned int Build_s_kernels_mat_sums(); + // Function, that return root mean square error of model + T s_RMSE(unsigned int level); // sqrt(sum(delta_y^2)/n) + // The Loss function + T Loss_Function(unsigned int level); // sqrt(sum(delta_y^2)/n) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight))) + + unsigned int s_Fit(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate, + const bool element_invariant=true, unsigned int* kernel_classes=nullptr); + // Saving weight coefficients of regression surface model into file + unsigned int s_Save_fitted(const string weights); + // Loading weight coefficients of regression surface model from file + unsigned int s_Load_fitted(const string weights); + // This function is for getting predictions from kernel potential energy surface, geometry is loaded from memory + unsigned int s_Get_predictions(const unsigned int count_geometries, const unsigned int count_levels, + const T* geom, const unsigned int* surfaces, T* predictions); + // Function, that uses the regression surface to predict y for new data and investigate the model precision + unsigned int s_Score(const unsigned int count_geometries, const unsigned int count_levels, + const T* geom, const unsigned int* surfaces, T* predictions, T* y_values, T* score); + // Setting parameters of regression surface models + unsigned int s_Set_params(const string descriptor, const string kernel, const T* params); + // Getting parameters of regression surface models + unsigned int s_Get_params(string* descriptor, string* kernel, T* params); + // Setting the external descriptor function for computing the descriptor matrix + unsigned int s_Set_extern_descriptor(unsigned int (*extern_func)(const unsigned int, const unsigned int, const T*, T*)); + // Getting weight coefficients of regression surface model + unsigned int s_Get_weight_coeff(const unsigned int s_number, T* weights); + + // Loading geometries for ML-NEA calculations from memory + unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const T* geometries); + // Loading geometries for ML-NEA calculations from file + unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const string geometries_file); + // Calculation of energies for ML-NEA surfaces of geometries for all energy levels - ground and excited states + unsigned int s_Predict_ML_NEA_geometries(); + // Calculation of ML-NEA energy spectra + unsigned int s_Compute_ML_NEA_spectra(const T E_min, const T step, const unsigned int count_steps, const T delta); + // Saving the normalized ML-NEA energy spectra to file + unsigned int s_Save_ML_NEA_spectra(const string ML_NEA_spectra_file); + // destructor of surface REG model + ~REGSURF(); + +protected: + // string with setted type of kernels + string s_kernel_type; + // identification number of setted type of kernel + unsigned int s_kernel_id; + // string with selected type of descriptor for surface + string s_descriptor_type; + // identification number of selected type of descriptor + unsigned int s_descriptor_id; + + // dimension of kernels in surface geometry + unsigned int s_count_dimensions; + // count of kernels in surface geometry + unsigned int s_count_kernels; + // count of geometries in surface + unsigned int s_count_geometries; + // count of levels/surfaces + unsigned int s_count_levels; + // lambda parameter of regression + T* s_lambda = nullptr; + // alpha parameter of regression + T* s_alpha = nullptr; + // sigma parameter of kernels + T s_sigma; + // n for Matern kernels + unsigned int s_n; + // count of geometries for ML-NEA model + T ML_NEA_count_geometries; + // minimum of energy in begin of s_ML_NEA_spectra_vec (atomic units) + T ML_NEA_energy_min; + // the energy step in s_ML_NEA_spectra_vec (atomic units) + T ML_NEA_energy_step; + + // indicator of initialization of allocation of memory + bool s_aloc_init = false; + // indicator of initialization or initialization errors + bool s_base_init = false; + // indicator of initialization of s_distances_mat + bool s_distances_mat_init = false; + // indicator of initialization of s_descriptors_mat + bool s_descriptors_mat_init = false; + // indicator of initialization of s_kernels_mat + bool s_kernels_mat_init = false; + // indicator of using of extern descriptor functions + bool extern_descriptor = false; + // pointer to external descriptor function + unsigned int (*extern_desc)(const unsigned int, const unsigned int, const T*, T*) = nullptr; + + // indicator of loading of ML_NEA input data + bool s_ML_NEA_input_init = false; + // indicator of computing of ML_NEA surfaces + bool s_ML_NEA_surf_init = false; + // indicator of computing of ML_NEA spectra + bool s_ML_NEA_spectra_init = false; + + // the surface kernel parameters pointer, sigma parameter is first + T* s_params = nullptr; + // Matern kernel coefficient computed from n and k + T* s_Matern_comb_number = nullptr; + // numbers of geometries with minimum of y for levels [s_count_levels] + unsigned int* s_y_min = nullptr; + // The geometries data of each geometries [s_kernel_dimension * s_count_kernels * s_count_geometries] + T* s_geometries = nullptr; + // The surfaces y data values of each surfaces [s_count_geometries * s_count_surf] + T* s_y = nullptr; + // The surfaces Z proton numbers of atoms [s_count_kernels] + unsigned int* s_Z = nullptr; + // the sum of sums of kernel matrix for base geometry + T s_kernels_sum_sum; + + // matrix of coordinate distances between kernels for each geometry [s_count_kernels * s_count_kernels * s_count_geometries] + T* s_distances = nullptr; + // descriptor matrices for each geometry [s_kernel_dimension * s_kernel_dimension * s_count_geometries] + T* s_desc = nullptr; + // matrices of kernel coefficients for each surface kernel geometry [s_count_kernels * s_count_kernels] + T* s_kernels = nullptr; + // sums of kernels rows for geometries [s_count_kernels] + T* s_kernels_rows_sums; + // matrix of differences of sum kernel rows between geometries [s_count_kernels] + T* s_kernels_diff; + // vector differences of sums kernels rows and columns between base and other geometries [s_count_geometries] + T* s_kernels_diff_sums; + // vectors of weight coefficients for each surface kernel and surface [s_count_kernels] + T* s_weights = nullptr; + + // matrices of surface y energy/quantity predictions for each geometry and surface [s_count_geometries] + T* s_y_preds = nullptr; + // the surfaces difference y data values of each surfaces [s_count_geometries] + T* s_y_delta = nullptr; + // the surfaces temporary y data values of each surfaces [s_count_geometries] + T* s_y_temp = nullptr; + // the surfaces temporary y partial data values of each surfaces [count_kernels] + T* s_y_temp_part = nullptr; + // coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels] + T* s_distances_pred; + // matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels] + T* s_desc_pred; + // vector of matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels] + T* s_kernels_pred; + + // pointer to x buffer for model loaded from file + T* s_geom_file = nullptr; + // pointer to y buffer for model loaded from file + T* s_y_file = nullptr; + // pointer to parameters buffer for model loaded from file + T* s_param_file = nullptr; + // pointer to fitted data for model loaded from file + T* s_fit_file = nullptr; + // pointer to fitted data for model loaded from Z file + unsigned int* s_Z_file = nullptr; + // pointer to ML-NEA geometries buffer for model loaded from file + T* s_ML_NEA_geom_file = nullptr; + + // pointer to geometries loaded to ML_NEA model [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries] + T* s_ML_NEA_geometries = nullptr; + // ML-NEA surfaces numbers + unsigned int* s_ML_NEA_surf_numbers; + // pointer to y values computed for ML_NEA model [ML_NEA_count_geometries, s_count_levels] + T* s_ML_NEA_y = nullptr; + // pointer to minimal y values in geometries computed for ML_NEA model [s_count_levels] + T* s_ML_NEA_min_y = nullptr; + // pointer to spectra computed for ML_NEA model [count_steps + 1] + T* s_ML_NEA_spectra = nullptr; + + // NRMat for input loaded geometries [s_count_dimensions * s_count_kernels, count_geometries] + NRMat s_geometries_mat; + // NRMat for vector of y of geomeries for surface [s_count_geometries, s_count_levels] + NRMat s_y_mat; + // NRVec for vector of Z for coulomb matrix descriptor [s_count_kernels] + NRVec s_Z_vec; + + // NRMat for coordinate distances for geometries [s_count_kernels, s_count_kernels * s_count_geometries] + NRMat s_distances_mat; + // NRMat for matrix of descriptors between kernels in geometry [s_count_kernels, s_count_kernels * s_count_geometries] + NRMat s_desc_mat; + // NRVec of NRMat for matrix of kernel coefficients for kernels in geometry + // [count_geometries, count_kernels * count_kernels] + NRVec> s_kernels_vec_mat; + // NRVec of NRVec for sums of kernels rows for geometries [s_count_kernels * s_count_geometries] + NRVec> s_kernels_rows_sums_vec_vec; + // NRVec of NRVec for differences of sums kernels rows between base and other geometries [s_count_kernels, s_count_geometries] + NRVec> s_kernels_diff_vec_vec; + // NRVec for differences of sums kernels rows and columns between base and other geometries [s_count_geometries] + NRVec s_kernels_diff_sums_vec; + // NRVec of NRVec for vectors of weight coefficients of surface kernels in surface [s_count_kernels, s_count_levels] + NRVec> s_weights_vec_vec; + // NRVec for vector of y of geomeries for surface [s_count_geometries] + NRVec s_y_vec; + + // NRVec for vector of y predictions of geomeries for surface [s_count_geometries] + NRVec s_y_preds_vec; + // NRVec for vector of delta y values for surface [s_count_geometries] + NRVec s_y_delta_vec; + // NRVec for vector of temporary y values for surface [s_count_geometries] + NRVec s_y_temp_vec; + // NRVec for vector of temporary partial y values for geometry [s_count_kernels] + NRVec s_y_temp_part_vec; + // NRMat for coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels] + NRMat s_distances_pred_mat; + // NRMat for matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels] + NRMat s_desc_pred_mat; + // NRVec of NRMat for matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels] + NRVec> s_kernels_pred_vec_mat; + + // NRMat for ML-NEA geometries [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries] + NRMat s_ML_NEA_geometries_mat; + // NRVec for ML-NEA surfaces numbers + NRVec s_ML_NEA_surf_numbers_vec; + // NRMat for ML-NEA energies [s_count_levels, ML_NEA_count_geometries] + NRMat s_ML_NEA_y_mat; + // NRVec of minimal y values in geometries computed for ML_NEA model [s_count_levels] + NRVec s_ML_NEA_min_y_vec; + // NRVec for ML-NEA energy spectra [count_steps + 1] + NRVec s_ML_NEA_spectra_vec; +}; +} // end of namespace +# endif /* REG_H */ diff --git a/test_reg.cc b/test_reg.cc new file mode 100644 index 0000000..f9d0c4d --- /dev/null +++ b/test_reg.cc @@ -0,0 +1,131 @@ +/* + LA: linear algebra C++ interface library + Kernel ridge regression module Copyright (C) 2024 + Pavel Florian and Jiri Pittner or + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + + + +#include "reg.h" + +using namespace std; +using namespace LA; + + +int main(int argc, char *argv[]) +{ +// unsigned int i; + +double x; +double y; +bool end = false; +string user_input; +string model_x = "reg_test_x.csv"; +string model_y = "reg_test_y.csv"; +string model_p = "reg_test_p.csv"; +REG reg(model_x, model_y, model_p); +/* +double* reference_value; +double* score; +double RMSE; +double Loss; +double* params; +double* weights; +string kernel; + +reference_value = new double[1]; +score = new double[1]; +params = new double[4]; +weights = new double[451]; + +reference_value[0] = 1.00; +*/ +// creating model +// fitting model +reg.REG_Fit(0.0, 20); +//reg.REG_Fit_grad(0.1, 50, 0.2); +// get predictions + +cout << "Program for testing regression model \n" << endl; +/* +reg.REG_Get_weight_coeff(weights); +cout << "The weight coefficients are: " << kernel << endl; +for (i = 0; i < 451; i++) + cout << weights[i] << endl; +*/ +while (not end) + { + cout << "Please enter the x value or y to end \n" << endl; + cin >> user_input; + if (user_input == "y") + { + end = true; + break; + } + x = stod(user_input); + + reg.REG_Get_predictions(&x, &y, 1); + cout << "The model prediction is: " << y << endl; + /* + reg.REG_Get_Score(&x, &y, 1, reference_value, score); + cout << "The model prediction is: " << y << endl; + cout << "The reference value is: " << reference_value[0] << endl; + cout << "The score is: " << score[0] << endl; + + RMSE = reg.REG_RMSE(); + Loss = reg.Loss_Function(); + cout << "The RMSE is: " << RMSE << endl; + cout << "The Loss is: " << Loss << endl; + + reg.REG_Get_params(params, &kernel); + cout << "The parameters of model are: " << endl; + for (i = 0; i < 4; i++) + cout << "param: " << params[i] << endl; + + cout << "The type of kernel is: " << kernel << endl; + cout << endl; + + params[0] = 0; + kernel = "Gaussian"; + reg.REG_Set_params(params, kernel); + + reg.REG_Get_params(params, &kernel); + cout << "The new parameters o model are: " << RMSE << endl; + for (i = 0; i < 4; i++) + cout << "param: " << params[i] << endl; + + cout << "The new type of kernel is: " << kernel << endl; + cout << endl; + */ + } +/*reg.REG_Save_fitted("reg_test_m.csv"); +reg.REG_Load_fitted("reg_test_m.csv"); +end = false; +while (not end) + { + cout << "Please enter the x value or y to end \n" << endl; + cin >> user_input; + if (user_input == "y") + { + end = true; + break; + } + x = stod(user_input); + reg.REG_Get_predictions(&x, &y, 1); + cout << "The model prediction is: " << y << endl; + }*/ +return(0); +}; diff --git a/test_regsurf.cc b/test_regsurf.cc new file mode 100644 index 0000000..2b6ea49 --- /dev/null +++ b/test_regsurf.cc @@ -0,0 +1,118 @@ +/* + LA: linear algebra C++ interface library + Kernel ridge regression module Copyright (C) 2024 + Pavel Florian and Jiri Pittner or + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + + +#include "regsurf.h" + +using namespace std; +using namespace LA; + + + +int main(int argc, char *argv[]) +{ +unsigned int i, j; +unsigned int count_levels; +unsigned int count_kernels; +double x; +double y; +double RMSE; +double Loss; +bool end = false; +string user_input; +string model_geoms = "xyz.dat"; +string model_y_values = "E_sum.dat"; +string model_params = "conf_surf.dat"; +string model_Z_file = "Z.dat"; +string model_test_geom = "nea_geoms.xyz"; +/* +double* weight_coeff; +double* params; +string model_weight_file; +string* descriptor; +string* kernel; +*/ +count_levels = 10; +count_kernels = 12;/* +descriptor = new string[1]; +kernel = new string[1]; + +weight_coeff = new double[count_kernels * count_levels]; +params = new double[22]; +model_weight_file = "weights.dat"; +*/ +REGSURF regsurf(model_geoms, model_y_values, model_params, model_Z_file); +// creating model +// fitting model +regsurf.s_Fit(0.0, 100, 0.5); +//reg.REG_Fit_grad(0.1, 50, 0.2); + +// get predictions + +cout << "Program for testing regression model \n" << endl; +cout << "The root mean square errors: " << endl; +for (i = 0; i < count_levels; i++) + { + RMSE = regsurf.s_RMSE(i); + cout << RMSE << endl; + } +cout << "The values of Loss_function: " << endl; +for (i = 0; i < count_levels; i++) + { + Loss = regsurf.Loss_Function(i); + cout << Loss << endl; + } +/* +cout << "The weights of kernels: " << endl; +for (i = 0; i < count_levels; i++) + { + cout << "Level " << i << ": "; + regsurf.s_Get_weight_coeff(i, weight_coeff + (i * count_kernels)); + for (j = 0; j < count_kernels; j++) + cout << weight_coeff[(i * count_kernels) + j] << endl; + } + +regsurf.s_Save_fitted(model_weight_file); +regsurf.s_Load_fitted(model_weight_file); +regsurf.s_Get_params(descriptor, kernel, params); + +cout << "The descriptor is: " << descriptor[0] << endl; +cout << "The kernel is: " << kernel[0] << endl; +cout << "The parameters of model are: "; +for (i = 0; i < (2 * count_levels) + 1; i++) + cout << params[i] << endl; + +descriptor[0] = "Inverse_pairs"; +kernel[0] = "Laplacian"; +params[1] = 0.2; +regsurf.s_Set_params(descriptor[0], kernel[0], params); + +regsurf.s_Get_params(descriptor, kernel, params); +cout << "The new descriptor is: " << descriptor[0] << endl; +cout << "The new kernel is: " << kernel[0] << endl; +cout << "The new parameters of model are: "; +for (i = 0; i < (2 * count_levels) + 1; i++) + cout << params[i] << endl; +*/ +regsurf.s_Load_ML_NEA_geometries(50000, "nea_geoms.xyz"); +regsurf.s_Predict_ML_NEA_geometries(); +regsurf.s_Compute_ML_NEA_spectra(0.00, 0.0025, 4000, 0.2); +regsurf.s_Save_ML_NEA_spectra("spectra.dat"); +return(0); +};