LA_library/regsurf.h
2024-02-21 15:04:34 +01:00

287 lines
15 KiB
C++

/*
LA: linear algebra C++ interface library
Kernel ridge regression module Copyright (C) 2024
Pavel Florian <florian43@seznam.cz> and Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef REGSURF_H
#define REGSURF_H
#include <iostream>
#include <string>
#include <cmath>
#include <bits/stdc++.h>
#include <fstream>
using namespace std;
# include "mat.h" // vector support libla implementation
# include "vec.h" // compiler parameters: -llapack -lblas
# include "reg.h"
namespace LA {
template <typename T>
unsigned int Build_s_RE_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const T* reference_geometry);
template <typename T>
unsigned int Build_s_coulomb_matrix_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc, const unsigned int* Z);
template <typename T>
unsigned int Build_s_inverse_pairs_descriptor_mat(const unsigned int count_particles, const unsigned int count_geometries, const T* distances, T* desc);
// The kernel regression of surfaces of geometries in in 1D, 2D, 3D and multi-D space
// and computing of ML-NEA absorption spectra
template <typename T>
class REGSURF { // REG for potential energy surfaces (PES) and other quantities surfaces
public:
// constructor of surface REG model from memory locations
REGSURF(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries,
const T* values, const unsigned int* Z = nullptr);
// constructor of surface REG model from files
REGSURF(const string geometries_file, const string energies_file, const string parameters_file, const string Z_file = "");
// inicialization function of REG surface model
void Init(const unsigned int count_dimensions, const unsigned int count_kernels, const unsigned int count_geometries,
const unsigned int count_levels, const string descriptor, const string kernel, const T* params, const T* geometries,
const T* values, const unsigned int* Z);
// General kernel function
inline T Kernel(const T* x, const unsigned int dimensions, const unsigned int kernel_id);
// function for building matrix of geometry distances between kernels
unsigned int Build_s_distances_mat(const unsigned int count_geometries, const T* geometries, T* distances);
// function for building descriptor matrix
unsigned int Build_s_descriptor_mat(const unsigned int count_geometries, const T* distances, T* desc);
// Function, that build matrix of coefficients for kernel coordinates
unsigned int Build_s_kernels_mat(const unsigned int count_geometries, const T* desc, NRVec<NRMat<T>> &kernels_vec_mat);
// Function, that build sums of matrix coefficients for kernel coordinates
unsigned int Build_s_kernels_mat_sums();
// Function, that return root mean square error of model
T s_RMSE(unsigned int level); // sqrt(sum(delta_y^2)/n)
// The Loss function
T Loss_Function(unsigned int level); // sqrt(sum(delta_y^2)/n) + lambda((1 - alpha)/2 * sum(weight^2) + alpha * sum(abs(weight)))
unsigned int s_Fit(const T max_Loss, const unsigned int max_learning_cycles, const T learning_rate,
const bool element_invariant=true, unsigned int* kernel_classes=nullptr);
// Saving weight coefficients of regression surface model into file
unsigned int s_Save_fitted(const string weights);
// Loading weight coefficients of regression surface model from file
unsigned int s_Load_fitted(const string weights);
// This function is for getting predictions from kernel potential energy surface, geometry is loaded from memory
unsigned int s_Get_predictions(const unsigned int count_geometries, const unsigned int count_levels,
const T* geom, const unsigned int* surfaces, T* predictions);
// Function, that uses the regression surface to predict y for new data and investigate the model precision
unsigned int s_Score(const unsigned int count_geometries, const unsigned int count_levels,
const T* geom, const unsigned int* surfaces, T* predictions, T* y_values, T* score);
// Setting parameters of regression surface models
unsigned int s_Set_params(const string descriptor, const string kernel, const T* params);
// Getting parameters of regression surface models
unsigned int s_Get_params(string* descriptor, string* kernel, T* params);
// Setting the external descriptor function for computing the descriptor matrix
unsigned int s_Set_extern_descriptor(unsigned int (*extern_func)(const unsigned int, const unsigned int, const T*, T*));
// Getting weight coefficients of regression surface model
unsigned int s_Get_weight_coeff(const unsigned int s_number, T* weights);
// Loading geometries for ML-NEA calculations from memory
unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const T* geometries);
// Loading geometries for ML-NEA calculations from file
unsigned int s_Load_ML_NEA_geometries(const unsigned int count_geometries, const string geometries_file);
// Calculation of energies for ML-NEA surfaces of geometries for all energy levels - ground and excited states
unsigned int s_Predict_ML_NEA_geometries();
// Calculation of ML-NEA energy spectra
unsigned int s_Compute_ML_NEA_spectra(const T E_min, const T step, const unsigned int count_steps, const T delta);
// Saving the normalized ML-NEA energy spectra to file
unsigned int s_Save_ML_NEA_spectra(const string ML_NEA_spectra_file);
// destructor of surface REG model
~REGSURF();
protected:
// string with setted type of kernels
string s_kernel_type;
// identification number of setted type of kernel
unsigned int s_kernel_id;
// string with selected type of descriptor for surface
string s_descriptor_type;
// identification number of selected type of descriptor
unsigned int s_descriptor_id;
// dimension of kernels in surface geometry
unsigned int s_count_dimensions;
// count of kernels in surface geometry
unsigned int s_count_kernels;
// count of geometries in surface
unsigned int s_count_geometries;
// count of levels/surfaces
unsigned int s_count_levels;
// lambda parameter of regression
T* s_lambda = nullptr;
// alpha parameter of regression
T* s_alpha = nullptr;
// sigma parameter of kernels
T s_sigma;
// n for Matern kernels
unsigned int s_n;
// count of geometries for ML-NEA model
T ML_NEA_count_geometries;
// minimum of energy in begin of s_ML_NEA_spectra_vec (atomic units)
T ML_NEA_energy_min;
// the energy step in s_ML_NEA_spectra_vec (atomic units)
T ML_NEA_energy_step;
// indicator of initialization of allocation of memory
bool s_aloc_init = false;
// indicator of initialization or initialization errors
bool s_base_init = false;
// indicator of initialization of s_distances_mat
bool s_distances_mat_init = false;
// indicator of initialization of s_descriptors_mat
bool s_descriptors_mat_init = false;
// indicator of initialization of s_kernels_mat
bool s_kernels_mat_init = false;
// indicator of using of extern descriptor functions
bool extern_descriptor = false;
// pointer to external descriptor function
unsigned int (*extern_desc)(const unsigned int, const unsigned int, const T*, T*) = nullptr;
// indicator of loading of ML_NEA input data
bool s_ML_NEA_input_init = false;
// indicator of computing of ML_NEA surfaces
bool s_ML_NEA_surf_init = false;
// indicator of computing of ML_NEA spectra
bool s_ML_NEA_spectra_init = false;
// the surface kernel parameters pointer, sigma parameter is first
T* s_params = nullptr;
// Matern kernel coefficient computed from n and k
T* s_Matern_comb_number = nullptr;
// numbers of geometries with minimum of y for levels [s_count_levels]
unsigned int* s_y_min = nullptr;
// The geometries data of each geometries [s_kernel_dimension * s_count_kernels * s_count_geometries]
T* s_geometries = nullptr;
// The surfaces y data values of each surfaces [s_count_geometries * s_count_surf]
T* s_y = nullptr;
// The surfaces Z proton numbers of atoms [s_count_kernels]
unsigned int* s_Z = nullptr;
// the sum of sums of kernel matrix for base geometry
T s_kernels_sum_sum;
// matrix of coordinate distances between kernels for each geometry [s_count_kernels * s_count_kernels * s_count_geometries]
T* s_distances = nullptr;
// descriptor matrices for each geometry [s_kernel_dimension * s_kernel_dimension * s_count_geometries]
T* s_desc = nullptr;
// matrices of kernel coefficients for each surface kernel geometry [s_count_kernels * s_count_kernels]
T* s_kernels = nullptr;
// sums of kernels rows for geometries [s_count_kernels]
T* s_kernels_rows_sums;
// matrix of differences of sum kernel rows between geometries [s_count_kernels]
T* s_kernels_diff;
// vector differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
T* s_kernels_diff_sums;
// vectors of weight coefficients for each surface kernel and surface [s_count_kernels]
T* s_weights = nullptr;
// matrices of surface y energy/quantity predictions for each geometry and surface [s_count_geometries]
T* s_y_preds = nullptr;
// the surfaces difference y data values of each surfaces [s_count_geometries]
T* s_y_delta = nullptr;
// the surfaces temporary y data values of each surfaces [s_count_kernels]
T* s_y_temp = nullptr;
// the surfaces temporary y partial data values of each surfaces [count_kernels]
T* s_y_temp_part = nullptr;
// coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
T* s_distances_pred;
// matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
T* s_desc_pred;
// vector of matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
T* s_kernels_pred;
// pointer to x buffer for model loaded from file
T* s_geom_file = nullptr;
// pointer to y buffer for model loaded from file
T* s_y_file = nullptr;
// pointer to parameters buffer for model loaded from file
T* s_param_file = nullptr;
// pointer to fitted data for model loaded from file
T* s_fit_file = nullptr;
// pointer to fitted data for model loaded from Z file
unsigned int* s_Z_file = nullptr;
// pointer to ML-NEA geometries buffer for model loaded from file
T* s_ML_NEA_geom_file = nullptr;
// pointer to geometries loaded to ML_NEA model [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
T* s_ML_NEA_geometries = nullptr;
// ML-NEA surfaces numbers
unsigned int* s_ML_NEA_surf_numbers;
// pointer to y values computed for ML_NEA model [ML_NEA_count_geometries, s_count_levels]
T* s_ML_NEA_y = nullptr;
// pointer to minimal y values in geometries computed for ML_NEA model [s_count_levels]
T* s_ML_NEA_min_y = nullptr;
// pointer to spectra computed for ML_NEA model [count_steps + 1]
T* s_ML_NEA_spectra = nullptr;
// NRMat for input loaded geometries [s_count_dimensions * s_count_kernels, count_geometries]
NRMat<T> s_geometries_mat;
// NRMat for vector of y of geomeries for surface [s_count_geometries, s_count_levels]
NRMat<T> s_y_mat;
// NRVec for vector of Z for coulomb matrix descriptor [s_count_kernels]
NRVec<unsigned int> s_Z_vec;
// NRMat for coordinate distances for geometries [s_count_kernels, s_count_kernels * s_count_geometries]
NRMat<T> s_distances_mat;
// NRMat for matrix of descriptors between kernels in geometry [s_count_kernels, s_count_kernels * s_count_geometries]
NRMat<T> s_desc_mat;
// NRVec of NRMat for matrix of kernel coefficients for kernels in geometry
// [count_geometries, count_kernels * count_kernels]
NRVec<NRMat<T>> s_kernels_vec_mat;
// NRVec of NRVec for sums of kernels rows for geometries [s_count_kernels * s_count_geometries]
NRVec<NRVec<T>> s_kernels_rows_sums_vec_vec;
// NRVec of NRVec for differences of sums kernels rows between base and other geometries [s_count_kernels, s_count_geometries]
NRVec<NRVec<T>> s_kernels_diff_vec_vec;
// NRVec for differences of sums kernels rows and columns between base and other geometries [s_count_geometries]
NRVec<T> s_kernels_diff_sums_vec;
// NRVec of NRVec for vectors of weight coefficients of surface kernels in surface [s_count_kernels, s_count_levels]
NRVec<NRVec<T>> s_weights_vec_vec;
// NRVec for vector of y of geomeries for surface [s_count_geometries]
NRVec<T> s_y_vec;
// NRVec for vector of y predictions of geomeries for surface [s_count_geometries]
NRVec<T> s_y_preds_vec;
// NRVec for vector of delta y values for surface [s_count_geometries]
NRVec<T> s_y_delta_vec;
// NRVec for vector of temporary y values for surface [s_count_kernels]
NRVec<T> s_y_temp_vec;
// NRVec for vector of temporary partial y values for geometry [s_count_kernels]
NRVec<T> s_y_temp_part_vec;
// NRMat for coordinate distances for geometry for predictions [s_count_kernels, s_count_kernels]
NRMat<T> s_distances_pred_mat;
// NRMat for matrix of descriptors between kernels in geometry for predictions [s_count_kernels, s_count_kernels]
NRMat<T> s_desc_pred_mat;
// NRVec of NRMat for matrix of kernel coefficients for kernels in geometry for predictions [count_kernels * count_kernels]
NRVec<NRMat<T>> s_kernels_pred_vec_mat;
// NRMat for ML-NEA geometries [s_count_dimensions * s_count_kernels, ML_NEA_count_geometries]
NRMat<T> s_ML_NEA_geometries_mat;
// NRVec for ML-NEA surfaces numbers
NRVec<unsigned int> s_ML_NEA_surf_numbers_vec;
// NRMat for ML-NEA energies [s_count_levels, ML_NEA_count_geometries]
NRMat<T> s_ML_NEA_y_mat;
// NRVec of minimal y values in geometries computed for ML_NEA model [s_count_levels]
NRVec<T> s_ML_NEA_min_y_vec;
// NRVec for ML-NEA energy spectra [count_steps + 1]
NRVec<T> s_ML_NEA_spectra_vec;
};
} // end of namespace
# endif /* REG_H */