1
0
mirror of https://github.com/nmap/nmap.git synced 2025-12-06 04:31:29 +00:00

Upgrade liblinear to 2.47

This commit is contained in:
dmiller
2024-02-28 18:18:35 +00:00
parent 1fc984bc73
commit 34e0769329
22 changed files with 2883 additions and 1054 deletions

View File

@@ -1,7 +1,7 @@
#Nmap Changelog ($Id$); -*-text-*- #Nmap Changelog ($Id$); -*-text-*-
o Upgrade included libraries: Lua 5.4.6, libpcre2 10.43, zlib 1.3.1, o Upgrade included libraries: Lua 5.4.6, libpcre2 10.43, zlib 1.3.1,
libssh2 1.11.0 libssh2 1.11.0, liblinear 2.47
o [Zenmap][GH#2739] Fix a crash in Zenmap when changing a host comment. o [Zenmap][GH#2739] Fix a crash in Zenmap when changing a host comment.

View File

@@ -69,9 +69,16 @@ EOC
check_liblinear() { check_liblinear() {
LINEAR_SOURCE="https://www.csie.ntu.edu.tw/~cjlin/liblinear/" LINEAR_SOURCE="https://www.csie.ntu.edu.tw/~cjlin/liblinear/"
echo "Can't check liblinear, no version information is available" LINEAR_VERSION=$(awk '$2=="LIBLINEAR_VERSION"{print$3;exit}' $NDIR/liblinear/linear.h | sed 's/./&./1')
LINEAR_LATEST=$(curl -Ls $LINEAR_SOURCE | perl -lne 'if(/The current release \(([^)]+)\) of <b>LIBLINEAR/){print $1;exit 0}') LINEAR_LATEST=$(curl -Ls $LINEAR_SOURCE | perl -lne 'if(/liblinear-([\d.]+).tar.gz/){print $1}' | newest)
echo " Latest:" $LINEAR_LATEST if [ "$LINEAR_VERSION" != "$LINEAR_LATEST" ]; then
echo "Newer version of liblinear available"
echo " Current:" $LINEAR_VERSION
echo " Latest: " $LINEAR_LATEST
echo " Source: $LINEAR_SOURCE"
else
echo "liblinear: $LINEAR_VERSION"
fi
} }
check_zlib() { check_zlib() {

View File

@@ -1,5 +1,5 @@
Copyright (c) 2007-2011 The LIBLINEAR Project. Copyright (c) 2007-2023 The LIBLINEAR Project.
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without

View File

@@ -2,35 +2,42 @@ CXX ?= g++
CC ?= gcc CC ?= gcc
CFLAGS = -Wall -Wconversion -O3 -fPIC CFLAGS = -Wall -Wconversion -O3 -fPIC
LIBS = blas/blas.a LIBS = blas/blas.a
SHVER = 1 #LIBS = -lblas
SHVER = 5
AR = ar AR = ar
RANLIB = ranlib RANLIB = ranlib
#LIBS = -lblas OS = $(shell uname)
ifeq ($(OS),Darwin)
SHARED_LIB_FLAG = -dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)
else
SHARED_LIB_FLAG = -shared -Wl,-soname,liblinear.so.$(SHVER)
endif
all: train predict all: train predict
lib: linear.o tron.o blas/blas.a lib: linear.o newton.o blas/blas.a
$(CXX) -shared -dynamiclib linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER) $(CXX) $(SHARED_LIB_FLAG) linear.o newton.o blas/blas.a -o liblinear.so.$(SHVER)
liblinear.a: linear.o tron.o blas/blas.a liblinear.a: linear.o newton.o blas/blas.a
$(AR) rcv liblinear.a linear.o tron.o blas/*.o $(AR) rcv liblinear.a linear.o newton.o blas/*.o
$(RANLIB) liblinear.a $(RANLIB) liblinear.a
train: tron.o linear.o train.c blas/blas.a train: newton.o linear.o train.c blas/blas.a
$(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS) $(CXX) $(CFLAGS) -o train train.c newton.o linear.o $(LIBS)
predict: tron.o linear.o predict.c blas/blas.a predict: newton.o linear.o predict.c blas/blas.a
$(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS) $(CXX) $(CFLAGS) -o predict predict.c newton.o linear.o $(LIBS)
tron.o: tron.cpp tron.h newton.o: newton.cpp newton.h
$(CXX) $(CFLAGS) -c -o tron.o tron.cpp $(CXX) $(CFLAGS) -c -o newton.o newton.cpp
linear.o: linear.cpp linear.h linear.o: linear.cpp linear.h
$(CXX) $(CFLAGS) -c -o linear.o linear.cpp $(CXX) $(CFLAGS) -c -o linear.o linear.cpp
blas/blas.a: blas/blas.a: blas/*.c blas/*.h
cd blas; $(MAKE) OPTFLAGS='$(CFLAGS)' CC='$(CC)'; make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
clean: clean:
cd blas; $(MAKE) clean make -C blas clean
rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER) liblinear.a make -C matlab clean
rm -f *~ newton.o linear.o train predict liblinear.so.$(SHVER)

View File

@@ -1,30 +1,24 @@
#You must ensure nmake.exe, cl.exe, link.exe are in system path. CXX = cl.exe
#VCVARS32.bat CFLAGS = /nologo /O2 /EHsc /I. /D _WIN64 /D _CRT_SECURE_NO_DEPRECATE
#Under dosbox prompt
#nmake -f Makefile.win
##########################################
CXXC = cl.exe
CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
TARGET = windows TARGET = windows
all: $(TARGET)\train.exe $(TARGET)\predict.exe all: $(TARGET)\train.exe $(TARGET)\predict.exe lib
$(TARGET)\train.exe: tron.obj linear.obj train.c blas\*.c $(TARGET)\train.exe: newton.obj linear.obj train.c blas\*.c
$(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe tron.obj linear.obj train.c blas\*.c $(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe newton.obj linear.obj train.c blas\*.c
$(TARGET)\predict.exe: tron.obj linear.obj predict.c blas\*.c $(TARGET)\predict.exe: newton.obj linear.obj predict.c blas\*.c
$(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe tron.obj linear.obj predict.c blas\*.c $(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe newton.obj linear.obj predict.c blas\*.c
linear.obj: linear.cpp linear.h linear.obj: linear.cpp linear.h
$(CXX) $(CFLAGS) -c linear.cpp $(CXX) $(CFLAGS) -c linear.cpp
tron.obj: tron.cpp tron.h newton.obj: newton.cpp newton.h
$(CXX) $(CFLAGS) -c tron.cpp $(CXX) $(CFLAGS) -c newton.cpp
lib: linear.cpp linear.h linear.def tron.obj lib: linear.cpp linear.h linear.def newton.obj
$(CXX) $(CFLAGS) -LD linear.cpp tron.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def $(CXX) $(CFLAGS) -LD linear.cpp newton.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def
clean: clean:
-erase /Q *.obj $(TARGET)\. -erase /Q *.obj $(TARGET)\*.exe $(TARGET)\*.dll $(TARGET)\*.exp $(TARGET)\*.lib

View File

@@ -1,8 +1,10 @@
LIBLINEAR is a simple package for solving large-scale regularized LIBLINEAR is a simple package for solving large-scale regularized linear
linear classification. It currently supports L2-regularized logistic classification, regression and outlier detection. It currently supports
regression/L2-loss support vector classification/L1-loss support vector - L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
classification, and L1-regularized L2-loss support vector classification/ - L1-regularized L2-loss support vector classification/L1-regularized logistic regression
logistic regression. This document explains the usage of LIBLINEAR. - L2-regularized L2-loss support vector regression/L1-loss support vector regression
- one-class support vector machine.
This document explains the usage of LIBLINEAR.
To get started, please read the ``Quick Start'' section first. To get started, please read the ``Quick Start'' section first.
For developers, please check the ``Library Usage'' section to learn For developers, please check the ``Library Usage'' section to learn
@@ -16,20 +18,21 @@ Table of Contents
- Installation - Installation
- `train' Usage - `train' Usage
- `predict' Usage - `predict' Usage
- `svm-scale' Usage
- Examples - Examples
- Library Usage - Library Usage
- Building Windows Binaries - Building Windows Binaries
- Additional Information
- MATLAB/OCTAVE interface - MATLAB/OCTAVE interface
- PYTHON interface - Python Interface
- Additional Information
When to use LIBLINEAR but not LIBSVM When to use LIBLINEAR but not LIBSVM
==================================== ====================================
There are some large data for which with/without nonlinear mappings There are some large data for which with/without nonlinear mappings
gives similar performances. Without using kernels, one can gives similar performances. Without using kernels, one can
efficiently train a much larger set via a linear classifier. These efficiently train a much larger set via linear classification/regression.
data usually have a large number of features. Document classification These data usually have a large number of features. Document classification
is an example. is an example.
Warning: While generally liblinear is very fast, its default solver Warning: While generally liblinear is very fast, its default solver
@@ -74,8 +77,8 @@ sparse data, use `-l 0' to keep the sparsity.
Installation Installation
============ ============
On Unix systems, type `make' to build the `train' and `predict' On Unix systems, type `make' to build the `train', `predict',
programs. Run them without arguments to show the usages. and `svm-scale' programs. Run them without arguments to show the usages.
On other systems, consult `Makefile' to build them (e.g., see On other systems, consult `Makefile' to build them (e.g., see
'Building Windows binaries' in this file) or use the pre-built 'Building Windows binaries' in this file) or use the pre-built
@@ -85,11 +88,13 @@ This software uses some level-1 BLAS subroutines. The needed functions are
included in this package. If a BLAS library is available on your included in this package. If a BLAS library is available on your
machine, you may use it by modifying the Makefile: Unmark the following line machine, you may use it by modifying the Makefile: Unmark the following line
#LIBS ?= -lblas #LIBS = -lblas
and mark and mark
LIBS ?= blas/blas.a LIBS = blas/blas.a
The tool `svm-scale', borrowed from LIBSVM, is for scaling input data file.
`train' Usage `train' Usage
============= =============
@@ -97,33 +102,55 @@ and mark
Usage: train [options] training_set_file [model_file] Usage: train [options] training_set_file [model_file]
options: options:
-s type : set type of solver (default 1) -s type : set type of solver (default 1)
for multi-class classification
0 -- L2-regularized logistic regression (primal) 0 -- L2-regularized logistic regression (primal)
1 -- L2-regularized L2-loss support vector classification (dual) 1 -- L2-regularized L2-loss support vector classification (dual)
2 -- L2-regularized L2-loss support vector classification (primal) 2 -- L2-regularized L2-loss support vector classification (primal)
3 -- L2-regularized L1-loss support vector classification (dual) 3 -- L2-regularized L1-loss support vector classification (dual)
4 -- multi-class support vector classification by Crammer and Singer 4 -- support vector classification by Crammer and Singer
5 -- L1-regularized L2-loss support vector classification 5 -- L1-regularized L2-loss support vector classification
6 -- L1-regularized logistic regression 6 -- L1-regularized logistic regression
7 -- L2-regularized logistic regression (dual) 7 -- L2-regularized logistic regression (dual)
for regression
11 -- L2-regularized L2-loss support vector regression (primal)
12 -- L2-regularized L2-loss support vector regression (dual)
13 -- L2-regularized L1-loss support vector regression (dual)
for outlier detection
21 -- one-class support vector machine (dual)
-c cost : set the parameter C (default 1) -c cost : set the parameter C (default 1)
-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
-n nu : set the parameter nu of one-class SVM (default 0.5)
-e epsilon : set tolerance of termination criterion -e epsilon : set tolerance of termination criterion
-s 0 and 2 -s 0 and 2
|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2, |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
where f is the primal function and pos/neg are # of where f is the primal function and pos/neg are # of
positive/negative data (default 0.01) positive/negative data (default 0.01)
-s 1, 3, 4 and 7 -s 11
Dual maximal violation <= eps; similar to libsvm (default 0.1) |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.0001)
-s 1, 3, 4, 7, and 21
Dual maximal violation <= eps; similar to libsvm (default 0.1 except 0.01 for -s 21)
-s 5 and 6 -s 5 and 6
|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf, |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,
where f is the primal function (default 0.01) where f is the primal function (default 0.01)
-s 12 and 13
|f'(alpha)|_1 <= eps |f'(alpha0)|,
where f is the dual function (default 0.1)
-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1) -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
-R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is
(for -s 0, 2, 5, 6, 11)
-wi weight: weights adjust the parameter C of different classes (see README for details) -wi weight: weights adjust the parameter C of different classes (see README for details)
-v n: n-fold cross validation mode -v n: n-fold cross validation mode
-C : find parameters (C for -s 0, 2 and C, p for -s 11)
-q : quiet mode (no outputs) -q : quiet mode (no outputs)
Option -v randomly splits the data into n parts and calculates cross Option -v randomly splits the data into n parts and calculates cross
validation accuracy on them. validation accuracy on them.
Option -C conducts cross validation under different parameters and finds
the best one. This option is supported only by -s 0, -s 2 (for finding
C) and -s 11 (for finding C, p). If the solver is not specified, -s 2
is used.
Formulations: Formulations:
For L2-regularized logistic regression (-s 0), we solve For L2-regularized logistic regression (-s 0), we solve
@@ -152,23 +179,57 @@ For L1-regularized logistic regression (-s 6), we solve
min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i)) min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i))
where
Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
For L2-regularized logistic regression (-s 7), we solve For L2-regularized logistic regression (-s 7), we solve
min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant min_alpha 0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
s.t. 0 <= alpha_i <= C, s.t. 0 <= alpha_i <= C,
If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias]. where
Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
For L2-regularized L2-loss SVR (-s 11), we solve
min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2
For L2-regularized L2-loss SVR dual (-s 12), we solve
min_beta 0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i|
For L2-regularized L1-loss SVR dual (-s 13), we solve
min_beta 0.5(beta^T Q beta) - y^T beta + \sum |beta_i|
s.t. -C <= beta_i <= C,
where
Q is a matrix with Q_ij = x_i^T x_j.
For one-class SVM dual (-s 21), we solve
min_alpha 0.5(alpha^T Q alpha)
s.t. 0 <= alpha_i <= 1 and \sum alpha_i = nu*l,
where
Q is a matrix with Q_ij = x_i^T x_j.
If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias]. For
example, L2-regularized logistic regression (-s 0) becomes
min_w w^Tw/2 + (w_{n+1})^2/2 + C \sum log(1 + exp(-y_i [w; w_{n+1}]^T[x_i; bias]))
Some may prefer not having (w_{n+1})^2/2 (i.e., bias variable not
regularized). For primal solvers (-s 0, 2, 5, 6, 11), we provide an
option -R to remove (w_{n+1})^2/2. However, -R is generally not needed
as for most data with/without (w_{n+1})^2/2 give similar performances.
The primal-dual relationship implies that -s 1 and -s 2 give the same The primal-dual relationship implies that -s 1 and -s 2 give the same
model, and -s 0 and -s 7 give the same. model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same.
We implement 1-vs-the rest multi-class strategy. In training i We implement 1-vs-the rest multi-class strategy for classification.
vs. non_i, their C parameters are (weight from -wi)*C and C, In training i vs. non_i, their C parameters are (weight from -wi)*C
respectively. If there are only two classes, we train only one and C, respectively. If there are only two classes, we train only one
model. Thus weight1*C vs. weight2*C is used. See examples below. model. Thus weight1*C vs. weight2*C is used. See examples below.
We also implement multi-class SVM by Crammer and Singer (-s 4): We also implement multi-class SVM by Crammer and Singer (-s 4):
@@ -193,7 +254,16 @@ and C^m_i = C if m = y_i,
Usage: predict [options] test_file model_file output_file Usage: predict [options] test_file model_file output_file
options: options:
-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0) -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
-q : quiet mode (no outputs)
Note that -b is only needed in the prediction phase. This is different
from the setting of LIBSVM.
`svm-scale' Usage
=================
See LIBSVM README.
Examples Examples
======== ========
@@ -206,12 +276,38 @@ Train linear SVM with L2-loss function.
Train a logistic regression model. Train a logistic regression model.
> train -s 21 -n 0.1 data_file
Train a linear one-class SVM which selects roughly 10% data as outliers.
> train -v 5 -e 0.001 data_file > train -v 5 -e 0.001 data_file
Do five-fold cross-validation using L2-loss svm. Do five-fold cross-validation using L2-loss SVM.
Use a smaller stopping tolerance 0.001 than the default Use a smaller stopping tolerance 0.001 than the default
0.1 if you want more accurate solutions. 0.1 if you want more accurate solutions.
> train -C data_file
...
Best C = 0.000488281 CV accuracy = 83.3333%
> train -c 0.000488281 data_file
Conduct cross validation many times by L2-loss SVM and find the
parameter C which achieves the best cross validation accuracy. Then
use the selected C to train the data for getting a model.
> train -C -s 0 -v 3 -c 0.5 -e 0.0001 data_file
For parameter selection by -C, users can specify other
solvers (currently -s 0, -s 2 and -s 11 are supported) and
different number of CV folds. Further, users can use
the -c option to specify the smallest C value of the
search range. This option is useful when users want to
rerun the parameter selection procedure from a specified
C under a different setting, such as a stricter stopping
tolerance -e 0.0001 in the above example. Similarly, for
-s 11, users can use the -p option to specify the
maximal p value of the search range.
> train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
Train four classifiers: Train four classifiers:
@@ -233,18 +329,24 @@ Output probability estimates (for logistic regression only).
Library Usage Library Usage
============= =============
These functions and structures are declared in the header file `linear.h'.
You can see `train.c' and `predict.c' for examples showing how to use them.
We define LIBLINEAR_VERSION and declare `extern int liblinear_version; '
in linear.h, so you can check the version number.
- Function: model* train(const struct problem *prob, - Function: model* train(const struct problem *prob,
const struct parameter *param); const struct parameter *param);
This function constructs and returns a linear classification model This function constructs and returns a linear classification
according to the given training data and parameters. or regression model according to the given training data and
parameters.
struct problem describes the problem: struct problem describes the problem:
struct problem struct problem
{ {
int l, n; int l, n;
int *y; double *y;
struct feature_node **x; struct feature_node **x;
double bias; double bias;
}; };
@@ -252,10 +354,10 @@ Library Usage
where `l' is the number of training data. If bias >= 0, we assume where `l' is the number of training data. If bias >= 0, we assume
that one additional feature is added to the end of each data that one additional feature is added to the end of each data
instance. `n' is the number of feature (including the bias feature instance. `n' is the number of feature (including the bias feature
if bias >= 0). `y' is an array containing the target values. And if bias >= 0). `y' is an array containing the target values. (integers
`x' is an array of pointers, in classification, real numbers in regression) And `x' is an array
each of which points to a sparse representation (array of feature_node) of one of pointers, each of which points to a sparse representation (array
training vector. of feature_node) of one training vector.
For example, if we have the following training data: For example, if we have the following training data:
@@ -280,32 +382,44 @@ Library Usage
[ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?) [ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?)
[ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?) [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?)
struct parameter describes the parameters of a linear classification model: struct parameter describes the parameters of a linear classification
or regression model:
struct parameter struct parameter
{ {
int solver_type; int solver_type;
/* these are for training only */ /* these are for training only */
double eps; /* stopping criteria */ double eps; /* stopping tolerance */
double C; double C;
double nu; /* one-class SVM only */
int nr_weight; int nr_weight;
int *weight_label; int *weight_label;
double* weight; double* weight;
double p;
double *init_sol;
}; };
solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL. solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL, ONECLASS_SVM.
for classification
L2R_LR L2-regularized logistic regression (primal) L2R_LR L2-regularized logistic regression (primal)
L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual) L2R_L2LOSS_SVC_DUAL L2-regularized L2-loss support vector classification (dual)
L2R_L2LOSS_SVC L2-regularized L2-loss support vector classification (primal) L2R_L2LOSS_SVC L2-regularized L2-loss support vector classification (primal)
L2R_L1LOSS_SVC_DUAL L2-regularized L1-loss support vector classification (dual) L2R_L1LOSS_SVC_DUAL L2-regularized L1-loss support vector classification (dual)
MCSVM_CS multi-class support vector classification by Crammer and Singer MCSVM_CS support vector classification by Crammer and Singer
L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification L1R_L2LOSS_SVC L1-regularized L2-loss support vector classification
L1R_LR L1-regularized logistic regression L1R_LR L1-regularized logistic regression
L2R_LR_DUAL L2-regularized logistic regression (dual) L2R_LR_DUAL L2-regularized logistic regression (dual)
for regression
L2R_L2LOSS_SVR L2-regularized L2-loss support vector regression (primal)
L2R_L2LOSS_SVR_DUAL L2-regularized L2-loss support vector regression (dual)
L2R_L1LOSS_SVR_DUAL L2-regularized L1-loss support vector regression (dual)
for outlier detection
ONECLASS_SVM one-class support vector machine (dual)
C is the cost of constraints violation. C is the cost of constraints violation.
p is the sensitiveness of loss of support vector regression.
nu in ONECLASS_SVM approximates the fraction of data as outliers.
eps is the stopping criterion. eps is the stopping criterion.
nr_weight, weight_label, and weight are used to change the penalty nr_weight, weight_label, and weight are used to change the penalty
@@ -320,6 +434,10 @@ Library Usage
If you do not want to change penalty for any of the classes, If you do not want to change penalty for any of the classes,
just set nr_weight to 0. just set nr_weight to 0.
init_sol includes the initial weight vectors (supported for only some
solvers). See the explanation of the vector w in the model
structure.
*NOTE* To avoid wrong parameters, check_parameter() should be *NOTE* To avoid wrong parameters, check_parameter() should be
called before train(). called before train().
@@ -333,13 +451,21 @@ Library Usage
double *w; double *w;
int *label; /* label of each class */ int *label; /* label of each class */
double bias; double bias;
double rho; /* one-class SVM only */
}; };
param describes the parameters used to obtain the model. param describes the parameters used to obtain the model.
nr_class and nr_feature are the number of classes and features, respectively. nr_class is the number of classes for classification. It is a
non-negative integer with special cases of 0 (no training data at
all) and 1 (all training data in one class). For regression and
one-class SVM, nr_class = 2.
The nr_feature*nr_class array w gives feature weights. We use one nr_feature is the number of features.
The array w gives feature weights. Its size is
nr_feature*nr_class but is nr_feature if nr_class = 2 and the
solver is not MCSVM_CS (see more explanation below). We use one
against the rest for multi-class classification, so each feature against the rest for multi-class classification, so each feature
index corresponds to nr_class weight values. Weights are index corresponds to nr_class weight values. Weights are
organized in the following way organized in the following way
@@ -349,13 +475,19 @@ Library Usage
| for 1st feature | for 2nd feature | | for 1st feature | for 2nd feature |
+------------------+------------------+------------+ +------------------+------------------+------------+
The array label stores class labels.
When nr_class = 1 or 2, classification solvers (MCSVM_CS
excluded) return a single vector of weights by considering
label[0] as positive in training.
If bias >= 0, x becomes [x; bias]. The number of features is If bias >= 0, x becomes [x; bias]. The number of features is
increased by one, so w is a (nr_feature+1)*nr_class array. The increased by one, so w is a (nr_feature+1)*nr_class array. The
value of bias is stored in the variable bias. value of bias is stored in the variable bias.
The array label stores class labels. rho is the bias term used in one-class SVM only.
- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target); - Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target);
This function conducts cross validation. Data are separated to This function conducts cross validation. Data are separated to
nr_fold folds. Under given parameters, sequentially each fold is nr_fold folds. Under given parameters, sequentially each fold is
@@ -365,23 +497,53 @@ Library Usage
The format of prob is same as that for train(). The format of prob is same as that for train().
- Function: int predict(const model *model_, const feature_node *x); - Function: void find_parameters(const struct problem *prob,
const struct parameter *param, int nr_fold, double start_C,
double start_p, double *best_C, double *best_p, double *best_score);
This functions classifies a test vector using the given This function is similar to cross_validation. However, instead of
model. The predicted label is returned. conducting cross validation under specified parameters. For -s 0, 2, it
conducts cross validation many times under parameters C = start_C,
2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
the highest cross validation accuracy. For -s 11, it conducts cross
validation many times with a two-fold loop. The outer loop considers a
default sequence of p = 19/20*max_p, ..., 1/20*max_p, 0 and
under each p value the inner loop considers a sequence of parameters
C = start_C, 2*start_C, 4*start_C, ..., and finds the best one with the
lowest mean squared error.
- Function: int predict_values(const struct model *model_, If start_C <= 0, then this procedure calculates a small enough C
for prob as the start_C. The procedure stops when the models of
all folds become stable or C reaches max_C.
If start_p <= 0, then this procedure calculates a maximal p for prob as
the start_p. Otherwise, the procedure starts with the first
i/20*max_p <= start_p so the outer sequence is i/20*max_p,
(i-1)/20*max_p, ..., 0.
The best C, the best p, and the corresponding accuracy (or MSE) are
assigned to *best_C, *best_p and *best_score, respectively. For
classification, *best_p is not used, and the returned value is -1.
- Function: double predict(const model *model_, const feature_node *x);
For a classification model, the predicted class for x is returned.
For a regression model, the function value of x calculated using
the model is returned.
- Function: double predict_values(const struct model *model_,
const struct feature_node *x, double* dec_values); const struct feature_node *x, double* dec_values);
This function gives nr_w decision values in the array This function gives nr_w decision values in the array dec_values.
dec_values. nr_w is 1 if there are two classes except multi-class nr_w=1 if regression is applied or the number of classes is two. An exception is
svm by Crammer and Singer (-s 4), and is the number of classes otherwise. multi-class SVM by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the
number of classes.
We implement one-vs-the rest multi-class strategy (-s 0,1,2,3) and We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7)
multi-class svm by Crammer and Singer (-s 4) for multi-class SVM. and multi-class SVM by Crammer and Singer (-s 4) for multi-class SVM.
The class with the highest decision value is returned. The class with the highest decision value is returned.
- Function: int predict_probability(const struct model *model_, - Function: double predict_probability(const struct model *model_,
const struct feature_node *x, double* prob_estimates); const struct feature_node *x, double* prob_estimates);
This function gives nr_class probability estimates in the array This function gives nr_class probability estimates in the array
@@ -397,10 +559,36 @@ Library Usage
- Function: int get_nr_class(const model *model_); - Function: int get_nr_class(const model *model_);
The function gives the number of classes of the model. The function gives the number of classes of the model.
For a regression model, 2 is returned.
- Function: void get_labels(const model *model_, int* label); - Function: void get_labels(const model *model_, int* label);
This function outputs the name of labels into an array called label. This function outputs the name of labels into an array called label.
For a regression model, label is unchanged.
- Function: double get_decfun_coef(const struct model *model_, int feat_idx,
int label_idx);
This function gives the coefficient for the feature with feature index =
feat_idx and the class with label index = label_idx. Note that feat_idx
starts from 1, while label_idx starts from 0. If feat_idx is not in the
valid range (1 to nr_feature), then a zero value will be returned. For
classification models, if label_idx is not in the valid range (0 to
nr_class-1), then a zero value will be returned; for regression models
and one-class SVM models, label_idx is ignored.
- Function: double get_decfun_bias(const struct model *model_, int label_idx);
This function gives the bias term corresponding to the class with the
label_idx. For classification models, if label_idx is not in a valid range
(0 to nr_class-1), then a zero value will be returned; for regression
models, label_idx is ignored. This function cannot be called for a one-class
SVM model.
- Function: double get_decfun_rho(const struct model *model_);
This function gives rho, the bias term used in one-class SVM only. This
function can only be called for a one-class SVM model.
- Function: const char *check_parameter(const struct problem *prob, - Function: const char *check_parameter(const struct problem *prob,
const struct parameter *param); const struct parameter *param);
@@ -410,6 +598,21 @@ Library Usage
train() and cross_validation(). It returns NULL if the train() and cross_validation(). It returns NULL if the
parameters are feasible, otherwise an error message is returned. parameters are feasible, otherwise an error message is returned.
- Function: int check_probability_model(const struct model *model);
This function returns 1 if the model supports probability output;
otherwise, it returns 0.
- Function: int check_regression_model(const struct model *model);
This function returns 1 if the model is a regression model; otherwise
it returns 0.
- Function: int check_oneclass_model(const struct model *model);
This function returns 1 if the model is a one-class SVM model; otherwise
it returns 0.
- Function: int save_model(const char *model_file_name, - Function: int save_model(const char *model_file_name,
const struct model *model_); const struct model *model_);
@@ -443,13 +646,13 @@ Library Usage
Building Windows Binaries Building Windows Binaries
========================= =========================
Windows binaries are in the directory `windows'. To build them via Windows binaries are available in the directory `windows'. To re-build
Visual C++, use the following steps: them via Visual C++, use the following steps:
1. Open a dos command box and change to liblinear directory. If 1. Open a dos command box and change to liblinear directory. If
environment variables of VC++ have not been set, type environment variables of VC++ have not been set, type
"C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
You may have to modify the above command according which version of You may have to modify the above command according which version of
VC++ or where it is installed. VC++ or where it is installed.
@@ -458,13 +661,20 @@ VC++ or where it is installed.
nmake -f Makefile.win clean all nmake -f Makefile.win clean all
3. (optional) To build shared library liblinear.dll, type
nmake -f Makefile.win lib
4. (Optional) To build 32-bit windows binaries, you must
(1) Setup "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars32.bat" instead of vcvars64.bat
(2) Change CFLAGS in Makefile.win: /D _WIN64 to /D _WIN32
MATLAB/OCTAVE Interface MATLAB/OCTAVE Interface
======================= =======================
Please check the file README in the directory `matlab'. Please check the file README in the directory `matlab'.
PYTHON Interface Python Interface
================ ================
Please check the file README in the directory `python'. Please check the file README in the directory `python'.

View File

@@ -1,7 +1,7 @@
AR = ar AR ?= ar
RANLIB = ranlib RANLIB ?= ranlib
HEADERS = blas.h blas.h blasp.h HEADERS = blas.h blasp.h
FILES = dnrm2.o daxpy.o ddot.o dscal.o FILES = dnrm2.o daxpy.o ddot.o dscal.o
CFLAGS = $(OPTFLAGS) CFLAGS = $(OPTFLAGS)

View File

@@ -3,6 +3,10 @@
/* Functions listed in alphabetical order */ /* Functions listed in alphabetical order */
#ifdef __cplusplus
extern "C" {
#endif
#ifdef F2C_COMPAT #ifdef F2C_COMPAT
void cdotc_(fcomplex *dotval, int *n, fcomplex *cx, int *incx, void cdotc_(fcomplex *dotval, int *n, fcomplex *cx, int *incx,
@@ -428,3 +432,7 @@ int ztrsm_(char *side, char *uplo, char *transa, char *diag, int *m,
int ztrsv_(char *uplo, char *trans, char *diag, int *n, dcomplex *a, int ztrsv_(char *uplo, char *trans, char *diag, int *n, dcomplex *a,
int *lda, dcomplex *x, int *incx); int *lda, dcomplex *x, int *incx);
#ifdef __cplusplus
}
#endif

View File

@@ -1,5 +1,9 @@
#include "blas.h" #include "blas.h"
#ifdef __cplusplus
extern "C" {
#endif
int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy, int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
int *incy) int *incy)
{ {
@@ -47,3 +51,7 @@ int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
return 0; return 0;
} /* daxpy_ */ } /* daxpy_ */
#ifdef __cplusplus
}
#endif

View File

@@ -1,5 +1,9 @@
#include "blas.h" #include "blas.h"
#ifdef __cplusplus
extern "C" {
#endif
double ddot_(int *n, double *sx, int *incx, double *sy, int *incy) double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
{ {
long int i, m, nn, iincx, iincy; long int i, m, nn, iincx, iincy;
@@ -48,3 +52,7 @@ double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
return stemp; return stemp;
} /* ddot_ */ } /* ddot_ */
#ifdef __cplusplus
}
#endif

View File

@@ -1,6 +1,10 @@
#include <math.h> /* Needed for fabs() and sqrt() */ #include <math.h> /* Needed for fabs() and sqrt() */
#include "blas.h" #include "blas.h"
#ifdef __cplusplus
extern "C" {
#endif
double dnrm2_(int *n, double *x, int *incx) double dnrm2_(int *n, double *x, int *incx)
{ {
long int ix, nn, iincx; long int ix, nn, iincx;
@@ -60,3 +64,7 @@ double dnrm2_(int *n, double *x, int *incx)
return norm; return norm;
} /* dnrm2_ */ } /* dnrm2_ */
#ifdef __cplusplus
}
#endif

View File

@@ -1,5 +1,9 @@
#include "blas.h" #include "blas.h"
#ifdef __cplusplus
extern "C" {
#endif
int dscal_(int *n, double *sa, double *sx, int *incx) int dscal_(int *n, double *sa, double *sx, int *incx)
{ {
long int i, m, nincx, nn, iincx; long int i, m, nincx, nn, iincx;
@@ -42,3 +46,7 @@ int dscal_(int *n, double *sa, double *sx, int *incx)
return 0; return 0;
} /* dscal_ */ } /* dscal_ */
#ifdef __cplusplus
}
#endif

View File

@@ -16,13 +16,13 @@
<ClCompile Include="blas\dnrm2.c" /> <ClCompile Include="blas\dnrm2.c" />
<ClCompile Include="blas\dscal.c" /> <ClCompile Include="blas\dscal.c" />
<ClCompile Include="linear.cpp" /> <ClCompile Include="linear.cpp" />
<ClCompile Include="tron.cpp" /> <ClCompile Include="newton.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="blas\blas.h" /> <ClInclude Include="blas\blas.h" />
<ClInclude Include="blas\blasp.h" /> <ClInclude Include="blas\blasp.h" />
<ClInclude Include="linear.h" /> <ClInclude Include="linear.h" />
<ClInclude Include="tron.h" /> <ClInclude Include="newton.h" />
</ItemGroup> </ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{A7BE3D76-F20C-40C5-8986-DE4028B3B57D}</ProjectGuid> <ProjectGuid>{A7BE3D76-F20C-40C5-8986-DE4028B3B57D}</ProjectGuid>

File diff suppressed because it is too large Load Diff

View File

@@ -16,3 +16,9 @@ EXPORTS
check_parameter @14 check_parameter @14
check_probability_model @15 check_probability_model @15
set_print_string_function @16 set_print_string_function @16
get_decfun_coef @17
get_decfun_bias @18
check_regression_model @19
find_parameters @20
get_decfun_rho @21
check_oneclass_model @22

View File

@@ -1,10 +1,14 @@
#ifndef _LIBLINEAR_H #ifndef _LIBLINEAR_H
#define _LIBLINEAR_H #define _LIBLINEAR_H
#define LIBLINEAR_VERSION 247
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
extern int liblinear_version;
struct feature_node struct feature_node
{ {
int index; int index;
@@ -14,23 +18,27 @@ struct feature_node
struct problem struct problem
{ {
int l, n; int l, n;
int *y; double *y;
struct feature_node **x; struct feature_node **x;
double bias; /* < 0 if no bias term */ double bias; /* < 0 if no bias term */
}; };
enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL }; /* solver_type */ enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL, ONECLASS_SVM = 21 }; /* solver_type */
struct parameter struct parameter
{ {
int solver_type; int solver_type;
/* these are for training only */ /* these are for training only */
double eps; /* stopping criteria */ double eps; /* stopping tolerance */
double C; double C;
int nr_weight; int nr_weight;
int *weight_label; int *weight_label;
double* weight; double* weight;
double p;
double nu;
double *init_sol;
int regularize_bias;
}; };
struct model struct model
@@ -41,14 +49,16 @@ struct model
double *w; double *w;
int *label; /* label of each class */ int *label; /* label of each class */
double bias; double bias;
double rho; /* one-class SVM only */
}; };
struct model* train(const struct problem *prob, const struct parameter *param); struct model* train(const struct problem *prob, const struct parameter *param);
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, int *target); void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);
int predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
int predict(const struct model *model_, const struct feature_node *x); double predict(const struct model *model_, const struct feature_node *x);
int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates); double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
int save_model(const char *model_file_name, const struct model *model_); int save_model(const char *model_file_name, const struct model *model_);
struct model *load_model(const char *model_file_name); struct model *load_model(const char *model_file_name);
@@ -56,6 +66,9 @@ struct model *load_model(const char *model_file_name);
int get_nr_feature(const struct model *model_); int get_nr_feature(const struct model *model_);
int get_nr_class(const struct model *model_); int get_nr_class(const struct model *model_);
void get_labels(const struct model *model_, int* label); void get_labels(const struct model *model_, int* label);
double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
double get_decfun_bias(const struct model *model_, int label_idx);
double get_decfun_rho(const struct model *model_);
void free_model_content(struct model *model_ptr); void free_model_content(struct model *model_ptr);
void free_and_destroy_model(struct model **model_ptr_ptr); void free_and_destroy_model(struct model **model_ptr_ptr);
@@ -63,6 +76,8 @@ void destroy_param(struct parameter *param);
const char *check_parameter(const struct problem *prob, const struct parameter *param); const char *check_parameter(const struct problem *prob, const struct parameter *param);
int check_probability_model(const struct model *model); int check_probability_model(const struct model *model);
int check_regression_model(const struct model *model);
int check_oneclass_model(const struct model *model);
void set_print_string_function(void (*print_func) (const char*)); void set_print_string_function(void (*print_func) (const char*));
#ifdef __cplusplus #ifdef __cplusplus

251
liblinear/newton.cpp Normal file
View File

@@ -0,0 +1,251 @@
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include "newton.h"
#ifndef min
template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
#endif
#ifndef max
template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
#endif
#ifdef __cplusplus
extern "C" {
#endif
extern double dnrm2_(int *, double *, int *);
extern double ddot_(int *, double *, int *, double *, int *);
extern int daxpy_(int *, double *, double *, int *, double *, int *);
extern int dscal_(int *, double *, double *, int *);
#ifdef __cplusplus
}
#endif
static void default_print(const char *buf)
{
fputs(buf,stdout);
fflush(stdout);
}
// On entry *f must be the function value of w
// On exit w is updated and *f is the new function value
double function::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
{
double gTs = 0;
double eta = 0.01;
int n = get_nr_variable();
int max_num_linesearch = 20;
double *w_new = new double[n];
double fold = *f;
for (int i=0;i<n;i++)
gTs += s[i] * g[i];
int num_linesearch = 0;
for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
{
for (int i=0;i<n;i++)
w_new[i] = w[i] + alpha*s[i];
*f = fun(w_new);
if (*f - fold <= eta * alpha * gTs)
break;
else
alpha *= 0.5;
}
if (num_linesearch >= max_num_linesearch)
{
*f = fold;
return 0;
}
else
memcpy(w, w_new, sizeof(double)*n);
delete [] w_new;
return alpha;
}
void NEWTON::info(const char *fmt,...)
{
char buf[BUFSIZ];
va_list ap;
va_start(ap,fmt);
vsprintf(buf,fmt,ap);
va_end(ap);
(*newton_print_string)(buf);
}
NEWTON::NEWTON(const function *fun_obj, double eps, double eps_cg, int max_iter)
{
this->fun_obj=const_cast<function *>(fun_obj);
this->eps=eps;
this->eps_cg=eps_cg;
this->max_iter=max_iter;
newton_print_string = default_print;
}
NEWTON::~NEWTON()
{
}
void NEWTON::newton(double *w)
{
int n = fun_obj->get_nr_variable();
int i, cg_iter;
double step_size;
double f, fold, actred;
double init_step_size = 1;
int search = 1, iter = 1, inc = 1;
double *s = new double[n];
double *r = new double[n];
double *g = new double[n];
const double alpha_pcg = 0.01;
double *M = new double[n];
// calculate gradient norm at w=0 for stopping condition.
double *w0 = new double[n];
for (i=0; i<n; i++)
w0[i] = 0;
fun_obj->fun(w0);
fun_obj->grad(w0, g);
double gnorm0 = dnrm2_(&n, g, &inc);
delete [] w0;
f = fun_obj->fun(w);
fun_obj->grad(w, g);
double gnorm = dnrm2_(&n, g, &inc);
info("init f %5.3e |g| %5.3e\n", f, gnorm);
if (gnorm <= eps*gnorm0)
search = 0;
while (iter <= max_iter && search)
{
fun_obj->get_diag_preconditioner(M);
for(i=0; i<n; i++)
M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
cg_iter = pcg(g, M, s, r);
fold = f;
step_size = fun_obj->linesearch_and_update(w, s, &f, g, init_step_size);
if (step_size == 0)
{
info("WARNING: line search fails\n");
break;
}
fun_obj->grad(w, g);
gnorm = dnrm2_(&n, g, &inc);
info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size);
if (gnorm <= eps*gnorm0)
break;
if (f < -1.0e+32)
{
info("WARNING: f < -1.0e+32\n");
break;
}
actred = fold - f;
if (fabs(actred) <= 1.0e-12*fabs(f))
{
info("WARNING: actred too small\n");
break;
}
iter++;
}
if(iter >= max_iter)
info("\nWARNING: reaching max number of Newton iterations\n");
delete[] g;
delete[] r;
delete[] s;
delete[] M;
}
int NEWTON::pcg(double *g, double *M, double *s, double *r)
{
int i, inc = 1;
int n = fun_obj->get_nr_variable();
double one = 1;
double *d = new double[n];
double *Hd = new double[n];
double zTr, znewTrnew, alpha, beta, cgtol, dHd;
double *z = new double[n];
double Q = 0, newQ, Qdiff;
for (i=0; i<n; i++)
{
s[i] = 0;
r[i] = -g[i];
z[i] = r[i] / M[i];
d[i] = z[i];
}
zTr = ddot_(&n, z, &inc, r, &inc);
double gMinv_norm = sqrt(zTr);
cgtol = min(eps_cg, sqrt(gMinv_norm));
int cg_iter = 0;
int max_cg_iter = max(n, 5);
while (cg_iter < max_cg_iter)
{
cg_iter++;
fun_obj->Hv(d, Hd);
dHd = ddot_(&n, d, &inc, Hd, &inc);
// avoid 0/0 in getting alpha
if (dHd <= 1.0e-16)
break;
alpha = zTr/dHd;
daxpy_(&n, &alpha, d, &inc, s, &inc);
alpha = -alpha;
daxpy_(&n, &alpha, Hd, &inc, r, &inc);
// Using quadratic approximation as CG stopping criterion
newQ = -0.5*(ddot_(&n, s, &inc, r, &inc) - ddot_(&n, s, &inc, g, &inc));
Qdiff = newQ - Q;
if (newQ <= 0 && Qdiff <= 0)
{
if (cg_iter * Qdiff >= cgtol * newQ)
break;
}
else
{
info("WARNING: quadratic approximation > 0 or increasing in CG\n");
break;
}
Q = newQ;
for (i=0; i<n; i++)
z[i] = r[i] / M[i];
znewTrnew = ddot_(&n, z, &inc, r, &inc);
beta = znewTrnew/zTr;
dscal_(&n, &beta, d, &inc);
daxpy_(&n, &one, z, &inc, d, &inc);
zTr = znewTrnew;
}
if (cg_iter == max_cg_iter)
info("WARNING: reaching maximal number of CG steps\n");
delete[] d;
delete[] Hd;
delete[] z;
return cg_iter;
}
void NEWTON::set_print_string(void (*print_string) (const char *buf))
{
newton_print_string = print_string;
}

37
liblinear/newton.h Normal file
View File

@@ -0,0 +1,37 @@
#ifndef _NEWTON_H
#define _NEWTON_H
class function
{
public:
virtual double fun(double *w) = 0 ;
virtual void grad(double *w, double *g) = 0 ;
virtual void Hv(double *s, double *Hs) = 0 ;
virtual int get_nr_variable(void) = 0 ;
virtual void get_diag_preconditioner(double *M) = 0 ;
virtual ~function(void){}
// base implementation in newton.cpp
virtual double linesearch_and_update(double *w, double *s, double *f, double *g, double alpha);
};
class NEWTON
{
public:
NEWTON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.5, int max_iter = 1000);
~NEWTON();
void newton(double *w);
void set_print_string(void (*i_print) (const char *buf));
private:
int pcg(double *g, double *M, double *s, double *r);
double eps;
double eps_cg;
int max_iter;
function *fun_obj;
void info(const char *fmt,...);
void (*newton_print_string)(const char *buf);
};
#endif

View File

@@ -5,6 +5,10 @@
#include <errno.h> #include <errno.h>
#include "linear.h" #include "linear.h"
int print_null(const char *s,...) {return 0;}
static int (*info)(const char *fmt,...) = &printf;
struct feature_node *x; struct feature_node *x;
int max_nr_attr = 64; int max_nr_attr = 64;
@@ -38,10 +42,12 @@ static char* readline(FILE *input)
return line; return line;
} }
void do_predict(FILE *input, FILE *output, struct model* model_) void do_predict(FILE *input, FILE *output)
{ {
int correct = 0; int correct = 0;
int total = 0; int total = 0;
double error = 0;
double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
int nr_class=get_nr_class(model_); int nr_class=get_nr_class(model_);
double *prob_estimates=NULL; double *prob_estimates=NULL;
@@ -77,7 +83,7 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
while(readline(input) != NULL) while(readline(input) != NULL)
{ {
int i = 0; int i = 0;
int target_label, predict_label; double target_label, predict_label;
char *idx, *val, *label, *endptr; char *idx, *val, *label, *endptr;
int inst_max_index = 0; // strtol gives 0 if wrong format int inst_max_index = 0; // strtol gives 0 if wrong format
@@ -85,7 +91,7 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
if(label == NULL) // empty line if(label == NULL) // empty line
exit_input_error(total+1); exit_input_error(total+1);
target_label = (int) strtol(label,&endptr,10); target_label = strtod(label,&endptr);
if(endptr == label || *endptr != '\0') if(endptr == label || *endptr != '\0')
exit_input_error(total+1); exit_input_error(total+1);
@@ -131,7 +137,7 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
{ {
int j; int j;
predict_label = predict_probability(model_,x,prob_estimates); predict_label = predict_probability(model_,x,prob_estimates);
fprintf(output,"%d",predict_label); fprintf(output,"%g",predict_label);
for(j=0;j<model_->nr_class;j++) for(j=0;j<model_->nr_class;j++)
fprintf(output," %g",prob_estimates[j]); fprintf(output," %g",prob_estimates[j]);
fprintf(output,"\n"); fprintf(output,"\n");
@@ -139,14 +145,29 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
else else
{ {
predict_label = predict(model_,x); predict_label = predict(model_,x);
fprintf(output,"%d\n",predict_label); fprintf(output,"%.17g\n",predict_label);
} }
if(predict_label == target_label) if(predict_label == target_label)
++correct; ++correct;
error += (predict_label-target_label)*(predict_label-target_label);
sump += predict_label;
sumt += target_label;
sumpp += predict_label*predict_label;
sumtt += target_label*target_label;
sumpt += predict_label*target_label;
++total; ++total;
} }
printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); if(check_regression_model(model_))
{
info("Mean squared error = %g (regression)\n",error/total);
info("Squared correlation coefficient = %g (regression)\n",
((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
);
}
else
info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
if(flag_predict_probability) if(flag_predict_probability)
free(prob_estimates); free(prob_estimates);
} }
@@ -156,7 +177,8 @@ void exit_with_help()
printf( printf(
"Usage: predict [options] test_file model_file output_file\n" "Usage: predict [options] test_file model_file output_file\n"
"options:\n" "options:\n"
"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0)\n" "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
"-q : quiet mode (no outputs)\n"
); );
exit(1); exit(1);
} }
@@ -176,7 +198,10 @@ int main(int argc, char **argv)
case 'b': case 'b':
flag_predict_probability = atoi(argv[i]); flag_predict_probability = atoi(argv[i]);
break; break;
case 'q':
info = &print_null;
i--;
break;
default: default:
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
exit_with_help(); exit_with_help();
@@ -207,7 +232,7 @@ int main(int argc, char **argv)
} }
x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node)); x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
do_predict(input, output, model_); do_predict(input, output);
free_and_destroy_model(&model_); free_and_destroy_model(&model_);
free(line); free(line);
free(x); free(x);

View File

@@ -16,28 +16,45 @@ void exit_with_help()
"Usage: train [options] training_set_file [model_file]\n" "Usage: train [options] training_set_file [model_file]\n"
"options:\n" "options:\n"
"-s type : set type of solver (default 1)\n" "-s type : set type of solver (default 1)\n"
" for multi-class classification\n"
" 0 -- L2-regularized logistic regression (primal)\n" " 0 -- L2-regularized logistic regression (primal)\n"
" 1 -- L2-regularized L2-loss support vector classification (dual)\n" " 1 -- L2-regularized L2-loss support vector classification (dual)\n"
" 2 -- L2-regularized L2-loss support vector classification (primal)\n" " 2 -- L2-regularized L2-loss support vector classification (primal)\n"
" 3 -- L2-regularized L1-loss support vector classification (dual)\n" " 3 -- L2-regularized L1-loss support vector classification (dual)\n"
" 4 -- multi-class support vector classification by Crammer and Singer\n" " 4 -- support vector classification by Crammer and Singer\n"
" 5 -- L1-regularized L2-loss support vector classification\n" " 5 -- L1-regularized L2-loss support vector classification\n"
" 6 -- L1-regularized logistic regression\n" " 6 -- L1-regularized logistic regression\n"
" 7 -- L2-regularized logistic regression (dual)\n" " 7 -- L2-regularized logistic regression (dual)\n"
" for regression\n"
" 11 -- L2-regularized L2-loss support vector regression (primal)\n"
" 12 -- L2-regularized L2-loss support vector regression (dual)\n"
" 13 -- L2-regularized L1-loss support vector regression (dual)\n"
" for outlier detection\n"
" 21 -- one-class support vector machine (dual)\n"
"-c cost : set the parameter C (default 1)\n" "-c cost : set the parameter C (default 1)\n"
"-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
"-n nu : set the parameter nu of one-class SVM (default 0.5)\n"
"-e epsilon : set tolerance of termination criterion\n" "-e epsilon : set tolerance of termination criterion\n"
" -s 0 and 2\n" " -s 0 and 2\n"
" |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
" where f is the primal function and pos/neg are # of\n" " where f is the primal function and pos/neg are # of\n"
" positive/negative data (default 0.01)\n" " positive/negative data (default 0.01)\n"
" -s 1, 3, 4 and 7\n" " -s 11\n"
" Dual maximal violation <= eps; similar to libsvm (default 0.1)\n" " |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.0001)\n"
" -s 1, 3, 4, 7, and 21\n"
" Dual maximal violation <= eps; similar to libsvm (default 0.1 except 0.01 for -s 21)\n"
" -s 5 and 6\n" " -s 5 and 6\n"
" |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n" " |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
" where f is the primal function (default 0.01)\n" " where f is the primal function (default 0.01)\n"
" -s 12 and 13\n"
" |f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
" where f is the dual function (default 0.1)\n"
"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
"-R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is\n"
" (for -s 0, 2, 5, 6, 11)\n"
"-wi weight: weights adjust the parameter C of different classes (see README for details)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
"-v n: n-fold cross validation mode\n" "-v n: n-fold cross validation mode\n"
"-C : find parameters (C for -s 0, 2 and C, p for -s 11)\n"
"-q : quiet mode (no outputs)\n" "-q : quiet mode (no outputs)\n"
); );
exit(1); exit(1);
@@ -73,12 +90,17 @@ static char* readline(FILE *input)
void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name); void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
void read_problem(const char *filename); void read_problem(const char *filename);
void do_cross_validation(); void do_cross_validation();
void do_find_parameters();
struct feature_node *x_space; struct feature_node *x_space;
struct parameter param; struct parameter param;
struct problem prob; struct problem prob;
struct model* model_; struct model* model_;
int flag_cross_validation; int flag_cross_validation;
int flag_find_parameters;
int flag_C_specified;
int flag_p_specified;
int flag_solver_specified;
int nr_fold; int nr_fold;
double bias; double bias;
@@ -94,11 +116,15 @@ int main(int argc, char **argv)
if(error_msg) if(error_msg)
{ {
fprintf(stderr,"Error: %s\n",error_msg); fprintf(stderr,"ERROR: %s\n",error_msg);
exit(1); exit(1);
} }
if(flag_cross_validation) if (flag_find_parameters)
{
do_find_parameters();
}
else if(flag_cross_validation)
{ {
do_cross_validation(); do_cross_validation();
} }
@@ -121,18 +147,63 @@ int main(int argc, char **argv)
return 0; return 0;
} }
void do_find_parameters()
{
double start_C, start_p, best_C, best_p, best_score;
if (flag_C_specified)
start_C = param.C;
else
start_C = -1.0;
if (flag_p_specified)
start_p = param.p;
else
start_p = -1.0;
printf("Doing parameter search with %d-fold cross validation.\n", nr_fold);
find_parameters(&prob, &param, nr_fold, start_C, start_p, &best_C, &best_p, &best_score);
if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
printf("Best C = %g CV accuracy = %g%%\n", best_C, 100.0*best_score);
else if(param.solver_type == L2R_L2LOSS_SVR)
printf("Best C = %g Best p = %g CV MSE = %g\n", best_C, best_p, best_score);
}
void do_cross_validation() void do_cross_validation()
{ {
int i; int i;
int total_correct = 0; int total_correct = 0;
int *target = Malloc(int, prob.l); double total_error = 0;
double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
double *target = Malloc(double, prob.l);
cross_validation(&prob,&param,nr_fold,target); cross_validation(&prob,&param,nr_fold,target);
if(param.solver_type == L2R_L2LOSS_SVR ||
param.solver_type == L2R_L1LOSS_SVR_DUAL ||
param.solver_type == L2R_L2LOSS_SVR_DUAL)
{
for(i=0;i<prob.l;i++)
{
double y = prob.y[i];
double v = target[i];
total_error += (v-y)*(v-y);
sumv += v;
sumy += y;
sumvv += v*v;
sumyy += y*y;
sumvy += v*y;
}
printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
printf("Cross Validation Squared correlation coefficient = %g\n",
((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
);
}
else
{
for(i=0;i<prob.l;i++) for(i=0;i<prob.l;i++)
if(target[i] == prob.y[i]) if(target[i] == prob.y[i])
++total_correct; ++total_correct;
printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l); printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
}
free(target); free(target);
} }
@@ -145,11 +216,19 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
// default values // default values
param.solver_type = L2R_L2LOSS_SVC_DUAL; param.solver_type = L2R_L2LOSS_SVC_DUAL;
param.C = 1; param.C = 1;
param.p = 0.1;
param.nu = 0.5;
param.eps = INF; // see setting below param.eps = INF; // see setting below
param.nr_weight = 0; param.nr_weight = 0;
param.regularize_bias = 1;
param.weight_label = NULL; param.weight_label = NULL;
param.weight = NULL; param.weight = NULL;
param.init_sol = NULL;
flag_cross_validation = 0; flag_cross_validation = 0;
flag_C_specified = 0;
flag_p_specified = 0;
flag_solver_specified = 0;
flag_find_parameters = 0;
bias = -1; bias = -1;
// parse options // parse options
@@ -162,10 +241,21 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
{ {
case 's': case 's':
param.solver_type = atoi(argv[i]); param.solver_type = atoi(argv[i]);
flag_solver_specified = 1;
break; break;
case 'c': case 'c':
param.C = atof(argv[i]); param.C = atof(argv[i]);
flag_C_specified = 1;
break;
case 'p':
flag_p_specified = 1;
param.p = atof(argv[i]);
break;
case 'n':
param.nu = atof(argv[i]);
break; break;
case 'e': case 'e':
@@ -199,6 +289,16 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
i--; i--;
break; break;
case 'C':
flag_find_parameters = 1;
i--;
break;
case 'R':
param.regularize_bias = 0;
i--;
break;
default: default:
fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
exit_with_help(); exit_with_help();
@@ -226,14 +326,52 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
sprintf(model_file_name,"%s.model",p); sprintf(model_file_name,"%s.model",p);
} }
// default solver for parameter selection is L2R_L2LOSS_SVC
if(flag_find_parameters)
{
if(!flag_cross_validation)
nr_fold = 5;
if(!flag_solver_specified)
{
fprintf(stderr, "Solver not specified. Using -s 2\n");
param.solver_type = L2R_L2LOSS_SVC;
}
else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC && param.solver_type != L2R_L2LOSS_SVR)
{
fprintf(stderr, "Warm-start parameter search only available for -s 0, -s 2 and -s 11\n");
exit_with_help();
}
}
if(param.eps == INF) if(param.eps == INF)
{ {
if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC) switch(param.solver_type)
{
case L2R_LR:
case L2R_L2LOSS_SVC:
param.eps = 0.01; param.eps = 0.01;
else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS || param.solver_type == L2R_LR_DUAL) break;
case L2R_L2LOSS_SVR:
param.eps = 0.0001;
break;
case L2R_L2LOSS_SVC_DUAL:
case L2R_L1LOSS_SVC_DUAL:
case MCSVM_CS:
case L2R_LR_DUAL:
param.eps = 0.1; param.eps = 0.1;
else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR) break;
case L1R_L2LOSS_SVC:
case L1R_LR:
param.eps = 0.01; param.eps = 0.01;
break;
case L2R_L1LOSS_SVR_DUAL:
case L2R_L2LOSS_SVR_DUAL:
param.eps = 0.1;
break;
case ONECLASS_SVM:
param.eps = 0.01;
break;
}
} }
} }
@@ -241,7 +379,7 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
void read_problem(const char *filename) void read_problem(const char *filename)
{ {
int max_index, inst_max_index, i; int max_index, inst_max_index, i;
long int elements, j; size_t elements, j;
FILE *fp = fopen(filename,"r"); FILE *fp = fopen(filename,"r");
char *endptr; char *endptr;
char *idx, *val, *label; char *idx, *val, *label;
@@ -275,7 +413,7 @@ void read_problem(const char *filename)
prob.bias=bias; prob.bias=bias;
prob.y = Malloc(int,prob.l); prob.y = Malloc(double,prob.l);
prob.x = Malloc(struct feature_node *,prob.l); prob.x = Malloc(struct feature_node *,prob.l);
x_space = Malloc(struct feature_node,elements+prob.l); x_space = Malloc(struct feature_node,elements+prob.l);
@@ -290,7 +428,7 @@ void read_problem(const char *filename)
if(label == NULL) // empty line if(label == NULL) // empty line
exit_input_error(i+1); exit_input_error(i+1);
prob.y[i] = (int) strtol(label,&endptr,10); prob.y[i] = strtod(label,&endptr);
if(endptr == label || *endptr != '\0') if(endptr == label || *endptr != '\0')
exit_input_error(i+1); exit_input_error(i+1);

View File

@@ -1,235 +0,0 @@
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include "tron.h"
#ifndef min
template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
#endif
#ifndef max
template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
#endif
#ifdef __cplusplus
extern "C" {
#endif
extern double dnrm2_(int *, double *, int *);
extern double ddot_(int *, double *, int *, double *, int *);
extern int daxpy_(int *, double *, double *, int *, double *, int *);
extern int dscal_(int *, double *, double *, int *);
#ifdef __cplusplus
}
#endif
static void default_print(const char *buf)
{
fputs(buf,stdout);
fflush(stdout);
}
void TRON::info(const char *fmt,...)
{
char buf[BUFSIZ];
va_list ap;
va_start(ap,fmt);
vsprintf(buf,fmt,ap);
va_end(ap);
(*tron_print_string)(buf);
}
TRON::TRON(const function *fun_obj, double eps, int max_iter)
{
this->fun_obj=const_cast<function *>(fun_obj);
this->eps=eps;
this->max_iter=max_iter;
tron_print_string = default_print;
}
TRON::~TRON()
{
}
void TRON::tron(double *w)
{
// Parameters for updating the iterates.
double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
// Parameters for updating the trust region size delta.
double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
int n = fun_obj->get_nr_variable();
int i, cg_iter;
double delta, snorm, one=1.0;
double alpha, f, fnew, prered, actred, gs;
int search = 1, iter = 1, inc = 1;
double *s = new double[n];
double *r = new double[n];
double *w_new = new double[n];
double *g = new double[n];
for (i=0; i<n; i++)
w[i] = 0;
f = fun_obj->fun(w);
fun_obj->grad(w, g);
delta = dnrm2_(&n, g, &inc);
double gnorm1 = delta;
double gnorm = gnorm1;
if (gnorm <= eps*gnorm1)
search = 0;
iter = 1;
while (iter <= max_iter && search)
{
cg_iter = trcg(delta, g, s, r);
memcpy(w_new, w, sizeof(double)*n);
daxpy_(&n, &one, s, &inc, w_new, &inc);
gs = ddot_(&n, g, &inc, s, &inc);
prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
fnew = fun_obj->fun(w_new);
// Compute the actual reduction.
actred = f - fnew;
// On the first iteration, adjust the initial step bound.
snorm = dnrm2_(&n, s, &inc);
if (iter == 1)
delta = min(delta, snorm);
// Compute prediction alpha*snorm of the step.
if (fnew - f - gs <= 0)
alpha = sigma3;
else
alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
// Update the trust region bound according to the ratio of actual to predicted reduction.
if (actred < eta0*prered)
delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
else if (actred < eta1*prered)
delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
else if (actred < eta2*prered)
delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
else
delta = max(delta, min(alpha*snorm, sigma3*delta));
info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
if (actred > eta0*prered)
{
iter++;
memcpy(w, w_new, sizeof(double)*n);
f = fnew;
fun_obj->grad(w, g);
gnorm = dnrm2_(&n, g, &inc);
if (gnorm <= eps*gnorm1)
break;
}
if (f < -1.0e+32)
{
info("warning: f < -1.0e+32\n");
break;
}
if (fabs(actred) <= 0 && prered <= 0)
{
info("warning: actred and prered <= 0\n");
break;
}
if (fabs(actred) <= 1.0e-12*fabs(f) &&
fabs(prered) <= 1.0e-12*fabs(f))
{
info("warning: actred and prered too small\n");
break;
}
}
delete[] g;
delete[] r;
delete[] w_new;
delete[] s;
}
int TRON::trcg(double delta, double *g, double *s, double *r)
{
int i, inc = 1;
int n = fun_obj->get_nr_variable();
double one = 1;
double *d = new double[n];
double *Hd = new double[n];
double rTr, rnewTrnew, alpha, beta, cgtol;
for (i=0; i<n; i++)
{
s[i] = 0;
r[i] = -g[i];
d[i] = r[i];
}
cgtol = 0.1*dnrm2_(&n, g, &inc);
int cg_iter = 0;
rTr = ddot_(&n, r, &inc, r, &inc);
while (1)
{
if (dnrm2_(&n, r, &inc) <= cgtol)
break;
cg_iter++;
fun_obj->Hv(d, Hd);
alpha = rTr/ddot_(&n, d, &inc, Hd, &inc);
daxpy_(&n, &alpha, d, &inc, s, &inc);
if (dnrm2_(&n, s, &inc) > delta)
{
info("cg reaches trust region boundary\n");
alpha = -alpha;
daxpy_(&n, &alpha, d, &inc, s, &inc);
double std = ddot_(&n, s, &inc, d, &inc);
double sts = ddot_(&n, s, &inc, s, &inc);
double dtd = ddot_(&n, d, &inc, d, &inc);
double dsq = delta*delta;
double rad = sqrt(std*std + dtd*(dsq-sts));
if (std >= 0)
alpha = (dsq - sts)/(std + rad);
else
alpha = (rad - std)/dtd;
daxpy_(&n, &alpha, d, &inc, s, &inc);
alpha = -alpha;
daxpy_(&n, &alpha, Hd, &inc, r, &inc);
break;
}
alpha = -alpha;
daxpy_(&n, &alpha, Hd, &inc, r, &inc);
rnewTrnew = ddot_(&n, r, &inc, r, &inc);
beta = rnewTrnew/rTr;
dscal_(&n, &beta, d, &inc);
daxpy_(&n, &one, r, &inc, d, &inc);
rTr = rnewTrnew;
}
delete[] d;
delete[] Hd;
return(cg_iter);
}
double TRON::norm_inf(int n, double *x)
{
double dmax = fabs(x[0]);
for (int i=1; i<n; i++)
if (fabs(x[i]) >= dmax)
dmax = fabs(x[i]);
return(dmax);
}
void TRON::set_print_string(void (*print_string) (const char *buf))
{
tron_print_string = print_string;
}

View File

@@ -1,34 +0,0 @@
#ifndef _TRON_H
#define _TRON_H
class function
{
public:
virtual double fun(double *w) = 0 ;
virtual void grad(double *w, double *g) = 0 ;
virtual void Hv(double *s, double *Hs) = 0 ;
virtual int get_nr_variable(void) = 0 ;
virtual ~function(void){}
};
class TRON
{
public:
TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
~TRON();
void tron(double *w);
void set_print_string(void (*i_print) (const char *buf));
private:
int trcg(double delta, double *g, double *s, double *r);
double norm_inf(int n, double *x);
double eps;
int max_iter;
function *fun_obj;
void info(const char *fmt,...);
void (*tron_print_string)(const char *buf);
};
#endif