included kernel ridge regression module

This commit is contained in:
Jiri Pittner 2024-02-14 15:33:25 +01:00
parent 7172e26a50
commit c0ce70fbc0
8 changed files with 3111 additions and 3 deletions

2
.gitignore vendored
View File

@ -54,6 +54,8 @@ t
test
fi
tX
test_reg
test_regsurf
*.tar.bz2
.*.swp
# CVS default ignores end

View File

@ -1,12 +1,14 @@
lib_LTLIBRARIES = libla.la
include_HEADERS = version.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h
libla_la_SOURCES = simple.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc
include_HEADERS = version.h simple.h vecmat3.h quaternion.h fortran.h cuda_la.h auxstorage.h davidson.h laerror.h mat.h qsort.h vec.h bisection.h diis.h la.h noncblas.h smat.h numbers.h bitvector.h fourindex.h la_traits.h la_random.h nonclass.h sparsemat.h sparsesmat.h csrmat.h conjgrad.h gmres.h matexp.h permutation.h polynomial.h contfrac.h graph.h reg.h regsurf.h
libla_la_SOURCES = simple.cc quaternion.cc vecmat3.cc vec.cc mat.cc smat.cc sparsemat.cc sparsesmat.cc csrmat.cc laerror.cc noncblas.cc numbers.cc bitvector.cc strassen.cc nonclass.cc cuda_la.cc fourindex.cc permutation.cc polynomial.cc contfrac.cc graph.cc la_random.cc reg.cc regsurf.cc
nodist_libla_la_SOURCES = version.cc
check_PROGRAMS = t test tX
check_PROGRAMS = t test tX test_reg test_regsurf
t_SOURCES = t.cc t2.cc
#nodist_t_SOURCES =
test_SOURCES = test.cc
tX_SOURCES = tX.cc
test_reg_SOURCES = test_reg.cc
test_regsurf_SOURCES = test_regsurf.cc
LDADD = .libs/libla.a
ACLOCAL_AMFLAGS = -I m4
EXTRA_DIST = get_git_version LICENSE doxygen.cfg aminclude.am acinclude.m4 footer.html

851
reg.cc Normal file
View File

@ -0,0 +1,851 @@
/*
LA: linear algebra C++ interface library
Kernel ridge regression module Copyright (C) 2024
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "reg.h"
namespace LA {
template <typename T>
T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma)
{
unsigned int i;
T result = 0;
for (i = 0; i < dimensions; i++)
result = result + (x[i] * x[i]);
result = std::exp(-result/(2 * sigma * sigma));
return(result);
}
template <typename T>
T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma)
{
unsigned int i;
T result = 0;
for (i = 0; i < dimensions; i++)
result = result + abs(x[i]);
result = std::exp(-abs(result)/(sigma));
return(result);
}
template <typename T>
T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma)
{
unsigned int i;
T result = 0;
for (i = 0; i < dimensions; i++)
result = result + (x[i] * x[i]);
result = std::exp(-std::pow(result, 0.5)/sigma);
return(result);
}
template <typename T>
T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n,
const T* Matern_comb_number)
{
unsigned int i;
unsigned int k;
T part_1 = 0; // sum(x^2)^1/2
T exponential_part;
T Matern_part = 0;
T result;
for (i = 0; i < dimensions; i++)
part_1 = part_1 + (x[i] * x[i]);
part_1 = std::pow(part_1, 0.5);
exponential_part = std::exp(-part_1/(sigma));
for (k = 0; k < n; k++)
Matern_part += Matern_comb_number[i] * std::pow(2 * part_1/sigma, n - k);
result = exponential_part * Matern_part;
return(result);
}
template <typename T>
unsigned int Matern_coefficient(const unsigned int n, T* comb_number) // values of (n + k)!/(2 n)! * n!/(k!(n-k)!)
{
unsigned int i , j;
unsigned int k;
unsigned int part_1; // reverse value of (n + k)!/(2 n)!
unsigned int part_2; // n!/(n-k)!
unsigned int part_3; // k!
for (k = 0; k < n; k++)
{
part_1 = 1;
part_2 = 1;
part_3 = 1;
for (i = n + k + 1; i <= 2 * n; i++)
part_1 = part_1 * i;
for (i = n - k + 1; i <= n; i++)
part_2 = part_2 * i;
for (i = 1; i < k; i++)
part_3 = part_3 * i;
comb_number[k] = T(part_2)/T(part_1 * part_3);
}
return(0);
}
template <typename T>
REG<T>::REG(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel)
{
Init(x, y , params, data_size, x_dimensions, kernel);
}
template <typename T>
REG<T>::REG(const string x_file, const string y_file, const string param_file)
{
unsigned int i;
ifstream xfile(x_file);
ifstream yfile(y_file);
ifstream paramfile(param_file);
string line;
T line_value;
// parameters of model
unsigned int data_size;
unsigned int x_dimensions;
string kernel;
REG_param_file = new T[4];
if(!REG_param_file) laerror("allocation failure in REG constructor");
// loading parameters for regression model from param_file
if (paramfile.is_open())
{
if (getline(paramfile, line))
data_size = stoi(line);
else
return;
if (getline(paramfile, line))
x_dimensions = stoi(line);
else
return;
if (getline(paramfile, line))
kernel = line;
else
return;
if (getline(paramfile, line))
REG_param_file[0] = stod(line);
else
return;
if (getline(paramfile, line))
REG_param_file[1] = stod(line);
else
return;
if (getline(paramfile, line))
REG_param_file[2] = stod(line);
else
return;
if (kernel == "Matern")
{
if (getline(paramfile, line))
REG_param_file[3] = static_cast<unsigned int>(stod(line) + 0.5);
else
return;
}
}
REG_x_file = new T[data_size * x_dimensions];
REG_y_file = new T[data_size];
if(!REG_x_file || !REG_y_file) laerror("allocation failure 2 in REG constructor");
// loading x and y data for regression model from files
if (xfile.is_open() and yfile.is_open())
{
for (i = 0; i < data_size * x_dimensions; i++)
{
if (getline(xfile, line))
{
line_value = stod(line);
REG_x_file[i] = line_value;
}
else
return;
}
for (i = 0; i < data_size; i++)
{
if (getline(yfile, line))
{
line_value = stod(line);
REG_y_file[i] = line_value;
}
else
return;
}
xfile.close();
yfile.close();
paramfile.close();
}
else
return;
Init(REG_x_file, REG_y_file, REG_param_file, data_size, x_dimensions, kernel);
return;
}
template <typename T>
void REG<T>::Init(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions,
const string kernel)
{
unsigned int i;
// Setting type of kernel
REG_kernel_type = kernel;
if (kernel != "Gaussian" and kernel != "Laplacian" and kernel != "Exponential" and kernel != "Matern")
return;
if (kernel == "Gaussian")
REG_kernel_id = 0;
if (kernel == "Laplacian")
REG_kernel_id = 1;
if (kernel == "Exponential")
REG_kernel_id = 2;
if (kernel == "Matern")
REG_kernel_id = 3;
// copy parameters and data
REG_lambda= params[0];
REG_alpha = params[1];
if (REG_alpha > 1)
REG_alpha = 1;
if (REG_alpha < 0)
REG_alpha = 0;
REG_sigma = params[2];
if (kernel == "Matern")
REG_n = static_cast<unsigned int>(params[3] + 0.5);
REG_count_kernels = data_size;
REG_count_dimensions = x_dimensions;
REG_aloc_init = false;
{
REG_params = new T[4];
if(!REG_params) laerror("allocation failure in REG::Init");
if (REG_kernel_id == 3)
{
for (i = 0; i < 4; i++)
REG_params[i] = params[i];
REG_Matern_comb_number_vec = NRVec<T>(REG_n);
REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*();
}
else
{
for (i = 0; i < 3; i++)
REG_params[i] = params[i];
REG_Matern_comb_number_vec = NRVec<T>(1);
REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*();
}
}
// copy data
REG_coordinates_vec = NRVec<T>(x, data_size * REG_count_dimensions);
REG_coordinates = REG_coordinates_vec.operator T*();
REG_y_vec = NRVec<T>(y , data_size);
REG_Y = REG_y_vec.operator T*();
// for fitting model
REG_distances_mat = NRMat<T>(data_size, data_size);
REG_distances = REG_distances_mat.operator T*();
REG_kernels_mat = NRMat<T>(data_size, data_size);
REG_kernels = REG_kernels_mat.operator T*();
REG_kernels_sums_vec = NRVec<T>(data_size);
REG_kernels_sums = REG_kernels_sums_vec.operator T*();
REG_weights_vec = NRVec<T>(data_size);
REG_weights = REG_weights_vec.operator T*();
// for predictions
REG_coordinates_diff_vec = NRVec<T>(data_size);
REG_coordinates_diff = REG_coordinates_diff_vec.operator T*();
REG_kernels_diff_vec = NRVec<T>(data_size);
REG_kernels_diff = REG_kernels_diff_vec.operator T*();
REG_y_preds_vec = NRVec<T>(data_size);
REG_y_preds = REG_y_preds_vec.operator T*();
REG_y_delta_vec = NRVec<T>(data_size);
REG_y_delta = REG_y_delta_vec.operator T*();
REG_y_temp_vec = NRVec<T>(data_size);
REG_y_temp = REG_y_temp_vec.operator T*();
REG_aloc_init = true;
// calculating Matern coefficients, if Matern kernel is selected
if (REG_kernel_id == 3)
Matern_coefficient(REG_n, REG_Matern_comb_number);
REG_base_init = true;
return;
}
template <typename T>
T REG<T>::Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id)
{
T result;
switch (kernel_id) {
case 0:
result = Gaussian_kernel(x, dimensions, REG_sigma);
break;
case 1:
result = Laplacian_kernel(x, dimensions, REG_sigma);
break;
case 2:
result = Laplacian_kernel(x, dimensions, REG_sigma);
break;
case 3:
result = Matern_kernel(x, dimensions, REG_sigma, REG_n, REG_Matern_comb_number);
break;
}
return(result);
}
template <typename T>
unsigned int REG<T>::Build_REG_distances_mat()
{
unsigned int i, j, k;
unsigned int mat_dimension, vec_dimension;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
mat_dimension = REG_count_kernels;
vec_dimension = REG_count_dimensions;
// building the matrix of vectors coordinates differences between kernels
for (i = 0; i < mat_dimension; i++)
{
for (j = i + 1; j < mat_dimension; j++)
for (k = 0; k < vec_dimension; k++)
{
REG_distances[(i * (mat_dimension) + j) * vec_dimension + k] = REG_coordinates[i * vec_dimension + k] -
REG_coordinates[j * vec_dimension + k];
}
}
// copy calculated values under diagonal
for (i = 0; i < mat_dimension; i++)
{
for (j = i + 1; j < mat_dimension; j++)
for (k = 0; k < vec_dimension; k++)
{
REG_distances[(j * (mat_dimension) + i) * vec_dimension + k] =
REG_distances[(i * (mat_dimension) + j) * vec_dimension + k];
}
}
// copy 0 to diagonal
for (i = 0; i < mat_dimension; i++)
for (k = 0; k < vec_dimension; k++)
REG_distances[(i * (mat_dimension + 1) * vec_dimension) + k] = 0;
return(0);
}
template <typename T>
unsigned int REG<T>::Build_REG_kernels_mat()
{
unsigned int i, j, k;
unsigned int mat_dimension, vec_dimension;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
// copy class parameters
mat_dimension = REG_count_kernels;
vec_dimension = REG_count_dimensions;
// crete kernel coefficients matrix for gaussian kernels
for (i = 0; i < mat_dimension; i++)
for (j = i + 1; j < mat_dimension; j++)
{
REG_kernels[i * mat_dimension + j] = Kernel(&REG_distances[(i * (mat_dimension) + j) * vec_dimension],
vec_dimension, REG_kernel_id);
}
// copy calculated values under diagonal
for (i = 0; i < mat_dimension; i++)
for (j = i + 1; j < mat_dimension; j++)
REG_kernels[(j * (mat_dimension) + i)] = REG_kernels[(i * (mat_dimension) + j)];
// copy 1 to diagonal
for (i = 0; i < mat_dimension; i++)
REG_kernels[i * (mat_dimension + 1)] = 1;
return(0);
}
template <typename T>
unsigned int REG<T>::Build_REG_kernels_mat_sums()
{
unsigned int i, j;
unsigned int count_kernels;
T sum;
count_kernels = REG_count_kernels;
for (i = 0; i < count_kernels; i++)
{
sum = 0;
for (j = 0; j < count_kernels; j++)
sum += REG_kernels[(i * count_kernels) + j];
REG_kernels_sums[i] = sum;
}
return(0);
}
template <typename T>
T REG<T>::REG_RMSE()
{
T delta_square;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
T y_max = REG_y_vec.amax();
T y_min = REG_y_vec.amin();
REG_y_delta_vec |= REG_y_preds_vec;
REG_y_delta_vec -= REG_y_vec;
REG_y_delta_vec *= REG_y_delta_vec;
delta_square = REG_y_delta_vec.sum();
delta_square = delta_square/REG_count_kernels;
delta_square = std::sqrt(delta_square);
return(delta_square);
}
template <typename T>
T REG<T>::Loss_Function()
{
unsigned int count_x;
T RMSE;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
// calculating the root mean square error - RMSE
RMSE = REG_RMSE();
return (RMSE);
}
template <typename T>
unsigned int REG<T>::REG_Fit(const T max_Loss, const unsigned int max_learning_cycles)
{ // the kernel ridge learning algorithm
unsigned int i, j;
T Loss;
T Lasso_part;
T kernel_ridge_part;
unsigned int count_kernels;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
// checking the input parameters
if (max_learning_cycles == 0)
{
laerror("zero learning cycles");
return(1);
}
if (max_Loss < 0)
{
laerror("negative value of maximum Loss function");
return(1);
}
Lasso_part = REG_lambda * REG_alpha;
kernel_ridge_part = REG_lambda * (1 - REG_alpha) * 0.5;
count_kernels = REG_count_kernels;
// initialization of REG_distances_mat and REG_kernels_mat if necessary
if (REG_mat_init == false)
{
Build_REG_distances_mat();
Build_REG_kernels_mat();
Build_REG_kernels_mat_sums();
REG_mat_init = true;
}
// initialitation of weight coefficients vector
REG_weights_vec |= REG_y_vec;
// Linear learning cycle of weight coefficient for learning
for (i = 0; i < max_learning_cycles; i++)
{
// calculating preds_vec vector of predicted y values
REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec);
if (kernel_ridge_part != 0)
for (j = 0; j < REG_count_kernels; j++)
REG_y_preds[j] *= (REG_kernels_sums[j] + kernel_ridge_part)/REG_kernels_sums[j];
if (Lasso_part != 0)
REG_y_preds_vec += Lasso_part;
// calculating new weights
REG_y_temp_vec |= REG_y_vec;
REG_y_temp_vec /= REG_y_preds_vec;
REG_weights_vec *= REG_y_temp_vec;
// checking the Loss_Function value
if (max_Loss > 0)
{
Loss = Loss_Function();
if (Loss < max_Loss)
break;
}
}
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Fit_grad(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate)
{
unsigned int i, j, k;
unsigned int count_x;
T Loss;
T delta_RMSE;
// for optimizing calculation of Kernel ridge part of Loss function difference
T kernel_ridge_part;
T delta_KRR_part;
// for optimizing calculation of Lasso part of Loss function difference
T Lasso_part;
T delta_Lasso_part;
T sqrt_count_x;
count_x = REG_count_kernels;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
// Checking the input parameters
if (max_learning_cycles == 0)
{
laerror("zero learning cycles");
return(1);
}
if (max_Loss < 0)
{
laerror("negative value of maximum Loss function");
return(1);
}
// initialization of REG_distances_mat and REG_kernels_mat if necessary
if (REG_mat_init == false)
{
Build_REG_distances_mat();
Build_REG_kernels_mat();
Build_REG_kernels_mat_sums();
REG_mat_init = true;
}
// initialitation of weight coefficients vector
REG_weights_vec |= REG_y_vec;
kernel_ridge_part = REG_lambda * (1.00 - REG_alpha);
Lasso_part = REG_lambda * REG_alpha;
sqrt_count_x = std::sqrt(count_x);
// gradient descent Learning cycle of weight coefficient for learning
for (i = 0; i < max_learning_cycles; i++)
{
// calculating preds_vec vector of predicted y values
REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec);
if (kernel_ridge_part != 0)
for (j = 0; j < count_x; j++)
REG_y_preds[j] *= (REG_kernels_sums[j] + kernel_ridge_part)/REG_kernels_sums[j];
if (Lasso_part != 0)
REG_y_preds_vec += Lasso_part;
// calculating difference of root mean square error - RMSE
for (j = 0; j < count_x; j++)
{
delta_RMSE = 0;
for (k = 0; k < count_x; k++)
delta_RMSE += (REG_y_preds[k] - REG_Y[k]) * REG_kernels[j * count_x + k];
delta_RMSE /=sqrt_count_x;
// calculating new weights
REG_weights[j] /= (1 - learning_rate * delta_RMSE);
}
// convergence criteria check
if (max_Loss > 0)
{
Loss = Loss_Function();
if (Loss < max_Loss)
break;
}
}
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Save_fitted(const string model_file)
{
unsigned int i;
ofstream modelfile(model_file);
string line;
// checking the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
// save model to file
if (modelfile.is_open())
for (i = 0; i < REG_count_kernels; i++)
{
line = to_string(REG_weights[i]);
modelfile << line << '\n';
}
modelfile.close();
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Load_fitted(const string model_file)
{
unsigned int i;
ifstream modelfile(model_file);
string line;
T line_value;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
// initialization of REG_distances_mat and REG_kernels_mat if necessary
if (REG_mat_init == false)
{
Build_REG_distances_mat();
Build_REG_kernels_mat();
REG_mat_init = true;
}
// load stored weights
if (modelfile.is_open())
{
for (i = 0; i < REG_count_kernels; i++)
if (getline(modelfile, line))
{
line_value = stod(line);
REG_weights[i] = line_value;
}
REG_y_preds_vec.gemv(0, REG_kernels_mat, 'n', 1, REG_weights_vec);
}
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Get_predictions(const T* x, T* y_preds, const unsigned int data_size)
{
unsigned int i, j, k;
T pred_value;
T Lasso_part;
T kernel_ridge_part;
T sum_kernels_diff;
T weights_sum;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
Lasso_part = REG_lambda * REG_alpha;
kernel_ridge_part = REG_lambda * (1 - REG_alpha) * 0.5;
// loop for predicting new values for each x
for (i = 0; i < data_size; i++) // For every x value
{
pred_value = 0;
// calculating the REG_coordinates_vec
for (j = 0; j < REG_count_kernels; j++) // For every kernel
{
for (k = 0; k < REG_count_dimensions; k++) // For every x coordinate
{
REG_coordinates_diff[(j * REG_count_dimensions) + k] = x[(i * REG_count_dimensions) + k] -
REG_coordinates[(j * REG_count_dimensions) + k];
}
}
// calculating the REG_kernels_vec
if (kernel_ridge_part != 0)
{
sum_kernels_diff = 0;
for (j = 0; j < REG_count_kernels; j++) // For every kernel
{
REG_kernels_diff[j] = Kernel(&REG_coordinates_diff[(j * REG_count_dimensions)], REG_count_dimensions, REG_kernel_id);
pred_value += REG_kernels_diff[j] * REG_weights[j];
sum_kernels_diff += REG_kernels_diff[j];
}
if (sum_kernels_diff != 0)
pred_value *= (sum_kernels_diff + kernel_ridge_part)/sum_kernels_diff;
}
else
for (j = 0; j < REG_count_kernels; j++) // For every kernel
{
REG_kernels_diff[j] = Kernel(&REG_coordinates_diff[(j * REG_count_dimensions)], REG_count_dimensions, REG_kernel_id);
pred_value += REG_kernels_diff[j] * REG_weights[j];
}
pred_value += Lasso_part;
// storing y prediction into memory
y_preds[i] = pred_value;
}
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Get_Score(const T* x, T* y_preds, const unsigned int data_size, const T* y, T* score)
{
unsigned int i;
T prediction;
// check the initialization
if (REG_base_init == false)
{
laerror("regression model not inicialized");
return(1);
}
REG_Get_predictions(x, y_preds, data_size);
prediction = 0;
for (i = 0; i < data_size; i++)
prediction += (y[i] - y_preds[i]) * (y[i] - y_preds[i]);
score[0] = prediction;
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Set_params(const T* params, const string kernel)
{
// setting type of kernel
REG_kernel_type = kernel;
if (kernel == "Gaussian")
REG_kernel_id = 0;
if (kernel == "Laplacian")
REG_kernel_id = 1;
if (kernel == "Exponential")
REG_kernel_id = 2;
if (kernel == "Matern")
REG_kernel_id = 3;
// copy parameters and data
REG_lambda = params[0];
REG_alpha = params[1];
if (REG_alpha > 1)
REG_alpha = 1;
if (REG_alpha < 0)
REG_alpha = 0;
REG_sigma = params[2];
if (kernel == "Matern")
REG_n = static_cast<unsigned int>(params[3] + 0.5);
// calculating Matern coefficients, if Matern kernel is selected
if (REG_kernel_id == 3)
{
REG_Matern_comb_number_vec.resize(REG_n);
REG_Matern_comb_number = REG_Matern_comb_number_vec.operator T*();
Matern_coefficient(REG_n, REG_Matern_comb_number);
}
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Get_params(T* params, string* kernel)
{
kernel[0] = REG_kernel_type;
params[0] = REG_lambda;
params[1] = REG_alpha;
params[2] = REG_sigma;
if (REG_kernel_type == "Mattern")
params[3] = REG_n;
return(0);
}
template <typename T>
unsigned int REG<T>::REG_Get_weight_coeff(T* weight)
{
unsigned int i;
for (i = 0; i < REG_count_kernels; i++)
weight[i] = REG_weights[i];
return(0);
}
template <typename T>
REG<T>::~REG()
{
// dealocating memory to avoid memory leaks
if (REG_params != nullptr)
delete[] REG_params;
if (REG_x_file != nullptr)
delete[] REG_x_file;
if (REG_y_file != nullptr)
delete[] REG_y_file;
if (REG_param_file != nullptr)
delete[] REG_param_file;
if (REG_fit_file != nullptr)
delete[] REG_fit_file;
}
//enforce instantiation
#define INSTANTIZE(T) \
template T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma); \
template T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma); \
template T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma); \
template T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n, const T* Matern_comb_number); \
template unsigned int Matern_coefficient(const unsigned int n, T* comb_number); \
INSTANTIZE(double)
} // end of namespace
template class REG<double>;

192
reg.h Normal file
View File

@ -0,0 +1,192 @@
/*
LA: linear algebra C++ interface library
Kernel ridge regression module Copyright (C) 2024
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <string>
#include <cmath>
#include <bits/stdc++.h>
#include <fstream>
using namespace std;
# include "mat.h" // vector support libla implementation
# include "vec.h" // compiler parameters: -llapack -lblas
# include "la.h"
# ifndef REG_H
# define REG_H
namespace LA {
template <typename T>
T Gaussian_kernel(const T* x, const unsigned int dimensions, const T sigma);
template <typename T>
T Laplacian_kernel(const T* x, const unsigned int dimensions, const T sigma);
template <typename T>
T Exponential_kernel(const T* x, const unsigned int dimensions, const T sigma);
template <typename T>
T Matern_kernel(const T* x, const unsigned int dimensions, const T sigma, const unsigned int n, const T* Matern_comb_number);
template <typename T>
unsigned int Matern_coefficient(const unsigned int n, T* comb_number);
// The kernel regression of points in 1D (curve), 2D (surface), 3D and multi-D space
template <typename T>
class REG { // Kernel regression (REG) library
public:
// Constructor of kernel regression model from memory location
REG(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel);
// Constructor of kernel regression model from files
REG(const string x_file, const string y_file, const string param_file);
// Init function for constructor
void Init(const T* x, const T* y, const T* params, const unsigned int data_size, const unsigned int x_dimensions, const string kernel);
// General kernel function
inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id);
// Function, that build matrix of distances between kernels
unsigned int Build_REG_distances_mat();
// Function, that build matrix of coefficients for kernel coordinates
unsigned int Build_REG_kernels_mat();
// Function, that build sums of matrix coefficients for kernel coordinates
unsigned int Build_REG_kernels_mat_sums();
// Function, that return root mean square error of model
T REG_RMSE(); // sum(delta_y)
// The Loss function
T Loss_Function(); // sum(delta_y) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight)))
// Function, that learn kernels and set weights
unsigned int REG_Fit(const T max_Loss, const unsigned int max_learning_cycles);
// Function, that learn kernels and set weights by gradient descent method
unsigned int REG_Fit_grad(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate);
// Function, that store fitted parameters of regression model to file
unsigned int REG_Save_fitted(const string model_file);
// Function, that load fitted parameters of model to file
unsigned int REG_Load_fitted(const string model_file);
// Function, that uses the kernel regression model to predict y for new data
unsigned int REG_Get_predictions(const T* x, T* y_preds, const unsigned int data_size);
// Function, that uses the kernel regression model to predict y for new data and investigate the model precision
unsigned int REG_Get_Score(const T* x, T* y_preds, const unsigned int data_size, const T* y, T* score);
// Setting parameters of kernel regression model
unsigned int REG_Set_params(const T* params, const string kernel);
// Getting parameters of kernel regression model
unsigned int REG_Get_params(T* params, string* kernel);
// Getting weight coefficients of kernels in regression model
unsigned int REG_Get_weight_coeff(T* weight);
// destructor of kernel regression model
~REG();
private:
// string with type of kernel
string REG_kernel_type;
// number of type of kernel
unsigned int REG_kernel_id;
// Count of x items (dimensions) for kernels
unsigned int REG_count_dimensions;
// number of input x values/kernels
unsigned int REG_count_kernels;
// lambda parameter of regression
T REG_lambda;
// alpha parameter of regression
T REG_alpha;
// sigma parameter of kernels
T REG_sigma;
// n for Matern kernels
unsigned int REG_n;
// indicator of initialization or initialization errors
bool REG_base_init = false;
// indicator of initialization of allocation of memory
bool REG_aloc_init = false;
// indicator of initialization of REG_REG_distances_mat and REG_REG_kernels_mat
bool REG_mat_init = false;
// Matern kernel coefficient computed from n and k
T* REG_Matern_comb_number = nullptr;
// pointer to parameters of kernel regression, various size, first is lambda, second alpha, third sigma parameter of kernels
// and fourth the n parameter, if is used Matern kernel
T* REG_params = nullptr;
// variable which contains number of learning cycles of model
unsigned int learning_cycles = 0;
// x values of kernel regression input data
T* REG_coordinates = nullptr;
// y values of kernel regression input data
T* REG_Y = nullptr;
// matrix of distances between kernels
T* REG_distances = nullptr;
// matrix of kernel coefficients for kernel coordinates
T* REG_kernels = nullptr;
// vector of row sums of kernel coefficients for kernel coordinates
T* REG_kernels_sums = nullptr;
// vector of weight coefficients for each kernel
T* REG_weights = nullptr;
// vector of distances between kernels
T* REG_coordinates_diff = nullptr;
// vector of kernel coefficients for kernel coordinates
T* REG_kernels_diff = nullptr;
// vector of y predictions for each kernel
T* REG_y_preds = nullptr;
// vector of differences between y values and predictions for each kernel
T* REG_y_delta = nullptr;
// vector of temporary results for processing y values and predictions for each kernel
T* REG_y_temp = nullptr;
// pointer to x buffer for model loaded from file
T* REG_x_file = nullptr;
// pointer to y buffer for model loaded from file
T* REG_y_file = nullptr;
// pointer to parameters buffer for model loaded from file
T* REG_param_file = nullptr;
// pointer to fitted data for model loaded from file
T* REG_fit_file = nullptr;
// NRVec for Matern kernel coefficient computed from n and k
NRVec<T> REG_Matern_comb_number_vec;
// NRVec for matrix of distances between kernels
NRVec<T> REG_coordinates_vec;
// NRVec for vector of y imput values of kernels
NRVec<T> REG_y_vec;
// NRMat for matrix of distances between kernels
NRMat<T> REG_distances_mat;
// NRMat for matrix of kernel coefficients for kernel coordinates
NRMat<T> REG_kernels_mat;
// NRVec for vector of row sums of kernel coefficients for kernel coordinates
NRVec<T> REG_kernels_sums_vec;
// NRVec for vector of weight coefficients of kernels
NRVec<T> REG_weights_vec;
// NRVec for matrix of coordinates differences between kernels
NRVec<T> REG_coordinates_diff_vec;
// NRVec for matrix of kernel coefficients for kernel coordinates
NRVec<T> REG_kernels_diff_vec;
// NRVec for vector of y predictions of kernels
NRVec<T> REG_y_preds_vec;
// NRVec for vector of differences between estimated real and real y values for training kernels
NRVec<T> REG_y_delta_vec;
// NRVec for temporary results for processing y values and predictions for each kernel
NRVec<T> REG_y_temp_vec;
};
// Include the template classes definitions
} // end of namespace
# endif /* REG_H */

1526
regsurf.cc Normal file

File diff suppressed because it is too large Load Diff

286
regsurf.h Normal file
View File

@ -0,0 +1,286 @@
/*
LA: linear algebra C++ interface library
Kernel ridge regression module Copyright (C) 2024
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef REGSURF_H
#define REGSURF_H
#include <iostream>
#include <string>
#include <cmath>
#include <bits/stdc++.h>
#include <fstream>
using namespace std;
# include "mat.h" // vector support libla implementation
# include "vec.h" // compiler parameters: -llapack -lblas
# include "reg.h"
namespace LA {
template <typename T>
unsigned int Build_s_RE_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const T* reference_geometry);
template <typename T>
unsigned int Build_s_coulomb_matrix_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const unsigned int* Z);
template <typename T>
unsigned int Build_s_inverse_pairs_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc);
// The kernel regression of surfaces of geometries in in 1D, 2D, 3D and multi-D space
// and computing of ML-NEA absorption spectra
template <typename T>
class REGSURF { // REG for potential energy surfaces (PES) and other quantities surfaces
public:
// constructor of surface REG model from memory locations
REGSURF(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries,
const T* values, const unsigned int* Z = nullptr);
// constructor of surface REG model from files
REGSURF(const string geometries_file, const string energies_file, const string parameters_file, const string Z_file = "");
// inicialization function of REG surface model
void Init(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries,
const T* values, const unsigned int* Z);
// General kernel function
inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id);
// function for building matrix of geometry distances between kernels
unsigned int Build_s_distances_mat(const unsigned int count_geometries, const T* geometries, T* distances);
// function for building descriptor matrix
unsigned int Build_s_descriptor_mat(const unsigned int count_geometries, const T* distances, T* desc);
// Function, that build matrix of coefficients for kernel coordinates
unsigned int Build_s_kernels_mat(const unsigned int count_geometries, const T* desc, NRVec<NRMat<T>> &kernels_vec_mat);
// Function, that build sums of matrix coefficients for kernel coordinates
unsigned int Build_s_kernels_mat_sums();
// Function, that return root mean square error of model
T s_RMSE(unsigned int level); // sqrt(sum(delta_y^2)/n)
// The Loss function
T Loss_Function(unsigned int level); // sqrt(sum(delta_y^2)/n) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight)))
unsigned int s_Fit(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate,
const bool element_invariant=true, unsigned int* kernel_classes=nullptr);
// Saving weight coefficients of regression surface model into file
unsigned int s_Save_fitted(const string weights);
// Loading weight coefficients of regression surface model from file
unsigned int s_Load_fitted(const string weights);
// This function is for getting predictions from kernel potential energy surface, geometry is loaded from memory
unsigned int s_Get_predictions(const unsigned int count_geometries, const unsigned int count_levels,
const T* geom, const unsigned int* surfaces, T* predictions);
// Function, that uses the regression surface to predict y for new data and investigate the model precision
unsigned int s_Score(const unsigned int count_geometries, const unsigned int count_levels,
const T* geom, const unsigned int* surfaces, T* predictions, T* y_values, T* score);
// Setting parameters of regression surface models
unsigned int s_Set_params(const string descriptor, const string kernel, const T* params);
// Getting parameters of regression surface models
unsigned int s_Get_params(string* descriptor, string* kernel, T* params);
// Setting the external descriptor function for computing the descriptor matrix
unsigned int s_Set_extern_descriptor(unsigned int (*extern_func)(const unsigned int, const unsigned int, const T*, T*));
// Getting weight coefficients of regression surface model
unsigned int s_Get_weight_coeff(const unsigned int s_number, T* weights);
// Loading geometries for ML-NEA calculations from memory
unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const T* geometries);
// Loading geometries for ML-NEA calculations from file
unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const string geometries_file);
// Calculation of energies for ML-NEA surfaces of geometries for all energy levels - ground and excited states
unsigned int s_Predict_ML_NEA_geometries();
// Calculation of ML-NEA energy spectra
unsigned int s_Compute_ML_NEA_spectra(const T E_min, const T step, const unsigned int count_steps, const T delta);
// Saving the normalized ML-NEA energy spectra to file
unsigned int s_Save_ML_NEA_spectra(const string ML_NEA_spectra_file);
// destructor of surface REG model
~REGSURF();
protected:
// string with setted type of kernels
string s_kernel_type;
// identification number of setted type of kernel
unsigned int s_kernel_id;
// string with selected type of descriptor for surface
string s_descriptor_type;
// identification number of selected type of descriptor
unsigned int s_descriptor_id;
// dimension of kernels in surface geometry
unsigned int s_count_dimensions;
// count of kernels in surface geometry
unsigned int s_count_kernels;
// count of geometries in surface
unsigned int s_count_geometries;
// count of levels/surfaces
unsigned int s_count_levels;
// lambda parameter of regression
T* s_lambda = nullptr;
// alpha parameter of regression
T* s_alpha = nullptr;
// sigma parameter of kernels
T s_sigma;
// n for Matern kernels
unsigned int s_n;
// count of geometries for ML-NEA model
T ML_NEA_count_geometries;
// minimum of energy in begin of s_ML_NEA_spectra_vec (atomic units)
T ML_NEA_energy_min;
// the energy step in s_ML_NEA_spectra_vec (atomic units)
T ML_NEA_energy_step;
// indicator of initialization of allocation of memory
bool s_aloc_init = false;
// indicator of initialization or initialization errors
bool s_base_init = false;
// indicator of initialization of s_distances_mat
bool s_distances_mat_init = false;
// indicator of initialization of s_descriptors_mat
bool s_descriptors_mat_init = false;
// indicator of initialization of s_kernels_mat
bool s_kernels_mat_init = false;
// indicator of using of extern descriptor functions
bool extern_descriptor = false;
// pointer to external descriptor function
unsigned int (*extern_desc)(const unsigned int, const unsigned int, const T*, T*) = nullptr;
// indicator of loading of ML_NEA input data
bool s_ML_NEA_input_init = false;
// indicator of computing of ML_NEA surfaces
bool s_ML_NEA_surf_init = false;
// indicator of computing of ML_NEA spectra
bool s_ML_NEA_spectra_init = false;
// the surface kernel parameters pointer, sigma parameter is first
T* s_params = nullptr;
// Matern kernel coefficient computed from n and k
T* s_Matern_comb_number = nullptr;
// numbers of geometries with minimum of y for levels [s_count_levels]
unsigned int* s_y_min = nullptr;
// The geometries data of each geometries [s_kernel_dimension * s_count_kernels * s_count_geometries]
T* s_geometries = nullptr;
// The surfaces y data values of each surfaces [s_count_geometries * s_count_surf]
T* s_y = nullptr;
// The surfaces Z proton numbers of atoms [s_count_kernels]
unsigned int* s_Z = nullptr;
// the sum of sums of kernel matrix for base geometry
T s_kernels_sum_sum;
// matrix of coordinate distances between kernels for each geometry [s_count_kernels * s_count_kernels * s_count_geometries]
T* s_distances = nullptr;
// descriptor matrices for each geometry [s_kernel_dimension * s_kernel_dimension * s_count_geometries]
T* s_desc = nullptr;
// matrices of kernel coefficients for each surface kernel geometry [s_count_kernels * s_count_kernels]
T* s_kernels = nullptr;
// sums of kernels rows for geometries [s_count_kernels]
T* s_kernels_rows_sums;
// matrix of differences of sum kernel rows between geometries [s_count_kernels]
T* s_kernels_diff;
// vector differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
T* s_kernels_diff_sums;
// vectors of weight coefficients for each surface kernel and surface [s_count_kernels]
T* s_weights = nullptr;
// matrices of surface y energy/quantity predictions for each geometry and surface [s_count_geometries]
T* s_y_preds = nullptr;
// the surfaces difference y data values of each surfaces [s_count_geometries]
T* s_y_delta = nullptr;
// the surfaces temporary y data values of each surfaces [s_count_geometries]
T* s_y_temp = nullptr;
// the surfaces temporary y partial data values of each surfaces [count_kernels]
T* s_y_temp_part = nullptr;
// coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
T* s_distances_pred;
// matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
T* s_desc_pred;
// vector of matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
T* s_kernels_pred;
// pointer to x buffer for model loaded from file
T* s_geom_file = nullptr;
// pointer to y buffer for model loaded from file
T* s_y_file = nullptr;
// pointer to parameters buffer for model loaded from file
T* s_param_file = nullptr;
// pointer to fitted data for model loaded from file
T* s_fit_file = nullptr;
// pointer to fitted data for model loaded from Z file
unsigned int* s_Z_file = nullptr;
// pointer to ML-NEA geometries buffer for model loaded from file
T* s_ML_NEA_geom_file = nullptr;
// pointer to geometries loaded to ML_NEA model [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
T* s_ML_NEA_geometries = nullptr;
// ML-NEA surfaces numbers
unsigned int* s_ML_NEA_surf_numbers;
// pointer to y values computed for ML_NEA model [ML_NEA_count_geometries, s_count_levels]
T* s_ML_NEA_y = nullptr;
// pointer to minimal y values in geometries computed for ML_NEA model [s_count_levels]
T* s_ML_NEA_min_y = nullptr;
// pointer to spectra computed for ML_NEA model [count_steps + 1]
T* s_ML_NEA_spectra = nullptr;
// NRMat for input loaded geometries [s_count_dimensions * s_count_kernels, count_geometries]
NRMat<T> s_geometries_mat;
// NRMat for vector of y of geomeries for surface [s_count_geometries, s_count_levels]
NRMat<T> s_y_mat;
// NRVec for vector of Z for coulomb matrix descriptor [s_count_kernels]
NRVec<unsigned int> s_Z_vec;
// NRMat for coordinate distances for geometries [s_count_kernels, s_count_kernels * s_count_geometries]
NRMat<T> s_distances_mat;
// NRMat for matrix of descriptors between kernels in geometry [s_count_kernels, s_count_kernels * s_count_geometries]
NRMat<T> s_desc_mat;
// NRVec of NRMat for matrix of kernel coefficients for kernels in geometry
// [count_geometries, count_kernels * count_kernels]
NRVec<NRMat<T>> s_kernels_vec_mat;
// NRVec of NRVec for sums of kernels rows for geometries [s_count_kernels * s_count_geometries]
NRVec<NRVec<T>> s_kernels_rows_sums_vec_vec;
// NRVec of NRVec for differences of sums kernels rows between base and other geometries [s_count_kernels, s_count_geometries]
NRVec<NRVec<T>> s_kernels_diff_vec_vec;
// NRVec for differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
NRVec<T> s_kernels_diff_sums_vec;
// NRVec of NRVec for vectors of weight coefficients of surface kernels in surface [s_count_kernels, s_count_levels]
NRVec<NRVec<T>> s_weights_vec_vec;
// NRVec for vector of y of geomeries for surface [s_count_geometries]
NRVec<T> s_y_vec;
// NRVec for vector of y predictions of geomeries for surface [s_count_geometries]
NRVec<T> s_y_preds_vec;
// NRVec for vector of delta y values for surface [s_count_geometries]
NRVec<T> s_y_delta_vec;
// NRVec for vector of temporary y values for surface [s_count_geometries]
NRVec<T> s_y_temp_vec;
// NRVec for vector of temporary partial y values for geometry [s_count_kernels]
NRVec<T> s_y_temp_part_vec;
// NRMat for coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
NRMat<T> s_distances_pred_mat;
// NRMat for matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
NRMat<T> s_desc_pred_mat;
// NRVec of NRMat for matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
NRVec<NRMat<T>> s_kernels_pred_vec_mat;
// NRMat for ML-NEA geometries [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
NRMat<T> s_ML_NEA_geometries_mat;
// NRVec for ML-NEA surfaces numbers
NRVec<unsigned int> s_ML_NEA_surf_numbers_vec;
// NRMat for ML-NEA energies [s_count_levels, ML_NEA_count_geometries]
NRMat<T> s_ML_NEA_y_mat;
// NRVec of minimal y values in geometries computed for ML_NEA model [s_count_levels]
NRVec<T> s_ML_NEA_min_y_vec;
// NRVec for ML-NEA energy spectra [count_steps + 1]
NRVec<T> s_ML_NEA_spectra_vec;
};
} // end of namespace
# endif /* REG_H */

131
test_reg.cc Normal file
View File

@ -0,0 +1,131 @@
/*
LA: linear algebra C++ interface library
Kernel ridge regression module Copyright (C) 2024
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "reg.h"
using namespace std;
using namespace LA;
int main(int argc, char *argv[])
{
// unsigned int i;
double x;
double y;
bool end = false;
string user_input;
string model_x = "reg_test_x.csv";
string model_y = "reg_test_y.csv";
string model_p = "reg_test_p.csv";
REG<double> reg(model_x, model_y, model_p);
/*
double* reference_value;
double* score;
double RMSE;
double Loss;
double* params;
double* weights;
string kernel;
reference_value = new double[1];
score = new double[1];
params = new double[4];
weights = new double[451];
reference_value[0] = 1.00;
*/
// creating model
// fitting model
reg.REG_Fit(0.0, 20);
//reg.REG_Fit_grad(0.1, 50, 0.2);
// get predictions
cout << "Program for testing regression model \n" << endl;
/*
reg.REG_Get_weight_coeff(weights);
cout << "The weight coefficients are: " << kernel << endl;
for (i = 0; i < 451; i++)
cout << weights[i] << endl;
*/
while (not end)
{
cout << "Please enter the x value or y to end \n" << endl;
cin >> user_input;
if (user_input == "y")
{
end = true;
break;
}
x = stod(user_input);
reg.REG_Get_predictions(&x, &y, 1);
cout << "The model prediction is: " << y << endl;
/*
reg.REG_Get_Score(&x, &y, 1, reference_value, score);
cout << "The model prediction is: " << y << endl;
cout << "The reference value is: " << reference_value[0] << endl;
cout << "The score is: " << score[0] << endl;
RMSE = reg.REG_RMSE();
Loss = reg.Loss_Function();
cout << "The RMSE is: " << RMSE << endl;
cout << "The Loss is: " << Loss << endl;
reg.REG_Get_params(params, &kernel);
cout << "The parameters of model are: " << endl;
for (i = 0; i < 4; i++)
cout << "param: " << params[i] << endl;
cout << "The type of kernel is: " << kernel << endl;
cout << endl;
params[0] = 0;
kernel = "Gaussian";
reg.REG_Set_params(params, kernel);
reg.REG_Get_params(params, &kernel);
cout << "The new parameters o model are: " << RMSE << endl;
for (i = 0; i < 4; i++)
cout << "param: " << params[i] << endl;
cout << "The new type of kernel is: " << kernel << endl;
cout << endl;
*/
}
/*reg.REG_Save_fitted("reg_test_m.csv");
reg.REG_Load_fitted("reg_test_m.csv");
end = false;
while (not end)
{
cout << "Please enter the x value or y to end \n" << endl;
cin >> user_input;
if (user_input == "y")
{
end = true;
break;
}
x = stod(user_input);
reg.REG_Get_predictions(&x, &y, 1);
cout << "The model prediction is: " << y << endl;
}*/
return(0);
};

118
test_regsurf.cc Normal file
View File

@ -0,0 +1,118 @@
/*
LA: linear algebra C++ interface library
Kernel ridge regression module Copyright (C) 2024
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "regsurf.h"
using namespace std;
using namespace LA;
int main(int argc, char *argv[])
{
unsigned int i, j;
unsigned int count_levels;
unsigned int count_kernels;
double x;
double y;
double RMSE;
double Loss;
bool end = false;
string user_input;
string model_geoms = "xyz.dat";
string model_y_values = "E_sum.dat";
string model_params = "conf_surf.dat";
string model_Z_file = "Z.dat";
string model_test_geom = "nea_geoms.xyz";
/*
double* weight_coeff;
double* params;
string model_weight_file;
string* descriptor;
string* kernel;
*/
count_levels = 10;
count_kernels = 12;/*
descriptor = new string[1];
kernel = new string[1];
weight_coeff = new double[count_kernels * count_levels];
params = new double[22];
model_weight_file = "weights.dat";
*/
REGSURF<double> regsurf(model_geoms, model_y_values, model_params, model_Z_file);
// creating model
// fitting model
regsurf.s_Fit(0.0, 100, 0.5);
//reg.REG_Fit_grad(0.1, 50, 0.2);
// get predictions
cout << "Program for testing regression model \n" << endl;
cout << "The root mean square errors: " << endl;
for (i = 0; i < count_levels; i++)
{
RMSE = regsurf.s_RMSE(i);
cout << RMSE << endl;
}
cout << "The values of Loss_function: " << endl;
for (i = 0; i < count_levels; i++)
{
Loss = regsurf.Loss_Function(i);
cout << Loss << endl;
}
/*
cout << "The weights of kernels: " << endl;
for (i = 0; i < count_levels; i++)
{
cout << "Level " << i << ": ";
regsurf.s_Get_weight_coeff(i, weight_coeff + (i * count_kernels));
for (j = 0; j < count_kernels; j++)
cout << weight_coeff[(i * count_kernels) + j] << endl;
}
regsurf.s_Save_fitted(model_weight_file);
regsurf.s_Load_fitted(model_weight_file);
regsurf.s_Get_params(descriptor, kernel, params);
cout << "The descriptor is: " << descriptor[0] << endl;
cout << "The kernel is: " << kernel[0] << endl;
cout << "The parameters of model are: ";
for (i = 0; i < (2 * count_levels) + 1; i++)
cout << params[i] << endl;
descriptor[0] = "Inverse_pairs";
kernel[0] = "Laplacian";
params[1] = 0.2;
regsurf.s_Set_params(descriptor[0], kernel[0], params);
regsurf.s_Get_params(descriptor, kernel, params);
cout << "The new descriptor is: " << descriptor[0] << endl;
cout << "The new kernel is: " << kernel[0] << endl;
cout << "The new parameters of model are: ";
for (i = 0; i < (2 * count_levels) + 1; i++)
cout << params[i] << endl;
*/
regsurf.s_Load_ML_NEA_geometries(50000, "nea_geoms.xyz");
regsurf.s_Predict_ML_NEA_geometries();
regsurf.s_Compute_ML_NEA_spectra(0.00, 0.0025, 4000, 0.2);
regsurf.s_Save_ML_NEA_spectra("spectra.dat");
return(0);
};