diff --git a/CHANGELOG b/CHANGELOG
index db4bf8fb0..758da2167 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,7 +1,7 @@
 #Nmap Changelog ($Id$); -*-text-*-
 
 o Upgrade included libraries: Lua 5.4.6, libpcre2 10.43, zlib 1.3.1,
-  libssh2 1.11.0
+  libssh2 1.11.0, liblinear 2.47
 
 o [Zenmap][GH#2739] Fix a crash in Zenmap when changing a host comment.
 
diff --git a/checklibs.sh b/checklibs.sh
index 1b7708bf0..c24835418 100644
--- a/checklibs.sh
+++ b/checklibs.sh
@@ -69,9 +69,16 @@ EOC
 
 check_liblinear() {
     LINEAR_SOURCE="https://www.csie.ntu.edu.tw/~cjlin/liblinear/"
-    echo "Can't check liblinear, no version information is available"
-    LINEAR_LATEST=$(curl -Ls $LINEAR_SOURCE | perl -lne 'if(/The current release \(([^)]+)\) of <b>LIBLINEAR/){print $1;exit 0}')
-    echo "  Latest:" $LINEAR_LATEST
+    LINEAR_VERSION=$(awk '$2=="LIBLINEAR_VERSION"{print$3;exit}' $NDIR/liblinear/linear.h | sed 's/./&./1')
+    LINEAR_LATEST=$(curl -Ls $LINEAR_SOURCE | perl -lne 'if(/liblinear-([\d.]+).tar.gz/){print $1}' | newest)
+    if [ "$LINEAR_VERSION" != "$LINEAR_LATEST" ]; then
+        echo "Newer version of liblinear available"
+        echo "  Current:" $LINEAR_VERSION
+        echo "  Latest: " $LINEAR_LATEST
+        echo "  Source: $LINEAR_SOURCE"
+    else
+      echo "liblinear: $LINEAR_VERSION"
+    fi
 }
 
 check_zlib() {
diff --git a/liblinear/COPYRIGHT b/liblinear/COPYRIGHT
index 71c0ad7f9..4e09b34da 100644
--- a/liblinear/COPYRIGHT
+++ b/liblinear/COPYRIGHT
@@ -1,5 +1,5 @@
 
-Copyright (c) 2007-2011 The LIBLINEAR Project.
+Copyright (c) 2007-2023 The LIBLINEAR Project.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/liblinear/Makefile b/liblinear/Makefile
index 40af81a67..065571e85 100644
--- a/liblinear/Makefile
+++ b/liblinear/Makefile
@@ -2,35 +2,42 @@ CXX ?= g++
 CC ?= gcc
 CFLAGS = -Wall -Wconversion -O3 -fPIC
 LIBS = blas/blas.a
-SHVER = 1
+#LIBS = -lblas
+SHVER = 5
 AR = ar
 RANLIB = ranlib
-#LIBS = -lblas
+OS = $(shell uname)
+ifeq ($(OS),Darwin)
+	SHARED_LIB_FLAG = -dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)
+else
+	SHARED_LIB_FLAG = -shared -Wl,-soname,liblinear.so.$(SHVER)
+endif
 
 all: train predict
 
-lib: linear.o tron.o blas/blas.a
-	$(CXX) -shared -dynamiclib linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER)
+lib: linear.o newton.o blas/blas.a
+	$(CXX) $(SHARED_LIB_FLAG) linear.o newton.o blas/blas.a -o liblinear.so.$(SHVER)
 
-liblinear.a: linear.o tron.o blas/blas.a
-	$(AR) rcv liblinear.a linear.o tron.o blas/*.o
+liblinear.a: linear.o newton.o blas/blas.a
+	$(AR) rcv liblinear.a linear.o newton.o blas/*.o
 	$(RANLIB) liblinear.a
 
-train: tron.o linear.o train.c blas/blas.a
-	$(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS)
+train: newton.o linear.o train.c blas/blas.a
+	$(CXX) $(CFLAGS) -o train train.c newton.o linear.o $(LIBS)
 
-predict: tron.o linear.o predict.c blas/blas.a
-	$(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS)
+predict: newton.o linear.o predict.c blas/blas.a
+	$(CXX) $(CFLAGS) -o predict predict.c newton.o linear.o $(LIBS)
 
-tron.o: tron.cpp tron.h
-	$(CXX) $(CFLAGS) -c -o tron.o tron.cpp
+newton.o: newton.cpp newton.h
+	$(CXX) $(CFLAGS) -c -o newton.o newton.cpp
 
 linear.o: linear.cpp linear.h
 	$(CXX) $(CFLAGS) -c -o linear.o linear.cpp
 
-blas/blas.a:
-	cd blas; $(MAKE) OPTFLAGS='$(CFLAGS)' CC='$(CC)';
+blas/blas.a: blas/*.c blas/*.h
+	make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
 
 clean:
-	cd blas;	$(MAKE) clean
-	rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER) liblinear.a
+	make -C blas clean
+	make -C matlab clean
+	rm -f *~ newton.o linear.o train predict liblinear.so.$(SHVER)
diff --git a/liblinear/Makefile.win b/liblinear/Makefile.win
index c5c721fae..1b199701c 100644
--- a/liblinear/Makefile.win
+++ b/liblinear/Makefile.win
@@ -1,30 +1,24 @@
-#You must ensure nmake.exe, cl.exe, link.exe are in system path.
-#VCVARS32.bat
-#Under dosbox prompt
-#nmake -f Makefile.win
-
-##########################################
-CXXC = cl.exe
-CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
+CXX = cl.exe
+CFLAGS = /nologo /O2 /EHsc /I. /D _WIN64 /D _CRT_SECURE_NO_DEPRECATE
 TARGET = windows
 
-all: $(TARGET)\train.exe $(TARGET)\predict.exe
+all: $(TARGET)\train.exe $(TARGET)\predict.exe lib
 
-$(TARGET)\train.exe: tron.obj linear.obj train.c blas\*.c
-	$(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe tron.obj linear.obj train.c blas\*.c
+$(TARGET)\train.exe: newton.obj linear.obj train.c blas\*.c
+	$(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe newton.obj linear.obj train.c blas\*.c
 
-$(TARGET)\predict.exe: tron.obj linear.obj predict.c blas\*.c
-	$(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe tron.obj linear.obj predict.c blas\*.c
+$(TARGET)\predict.exe: newton.obj linear.obj predict.c blas\*.c
+	$(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe newton.obj linear.obj predict.c blas\*.c
 
 linear.obj: linear.cpp linear.h
 	$(CXX) $(CFLAGS) -c linear.cpp
 
-tron.obj: tron.cpp tron.h
-	$(CXX) $(CFLAGS) -c tron.cpp
+newton.obj: newton.cpp newton.h
+	$(CXX) $(CFLAGS) -c newton.cpp
 
-lib: linear.cpp linear.h linear.def tron.obj
-	$(CXX) $(CFLAGS) -LD linear.cpp tron.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def 
+lib: linear.cpp linear.h linear.def newton.obj
+	$(CXX) $(CFLAGS) -LD linear.cpp newton.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def
 
 clean:
-	 -erase /Q *.obj $(TARGET)\.
+	 -erase /Q *.obj $(TARGET)\*.exe $(TARGET)\*.dll $(TARGET)\*.exp $(TARGET)\*.lib
 
diff --git a/liblinear/README b/liblinear/README
index 4e45c2e35..0c4338542 100644
--- a/liblinear/README
+++ b/liblinear/README
@@ -1,8 +1,10 @@
-LIBLINEAR is a simple package for solving large-scale regularized
-linear classification. It currently supports L2-regularized logistic
-regression/L2-loss support vector classification/L1-loss support vector
-classification, and L1-regularized L2-loss support vector classification/
-logistic regression. This document explains the usage of LIBLINEAR.
+LIBLINEAR is a simple package for solving large-scale regularized linear
+classification, regression and outlier detection. It currently supports
+- L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
+- L1-regularized L2-loss support vector classification/L1-regularized logistic regression
+- L2-regularized L2-loss support vector regression/L1-loss support vector regression
+- one-class support vector machine.
+This document explains the usage of LIBLINEAR.
 
 To get started, please read the ``Quick Start'' section first.
 For developers, please check the ``Library Usage'' section to learn
@@ -16,20 +18,21 @@ Table of Contents
 - Installation
 - `train' Usage
 - `predict' Usage
+- `svm-scale' Usage
 - Examples
 - Library Usage
 - Building Windows Binaries
-- Additional Information
 - MATLAB/OCTAVE interface
-- PYTHON interface
+- Python Interface
+- Additional Information
 
 When to use LIBLINEAR but not LIBSVM
 ====================================
 
 There are some large data for which with/without nonlinear mappings
 gives similar performances.  Without using kernels, one can
-efficiently train a much larger set via a linear classifier.  These
-data usually have a large number of features. Document classification
+efficiently train a much larger set via linear classification/regression.
+These data usually have a large number of features. Document classification
 is an example.
 
 Warning: While generally liblinear is very fast, its default solver
@@ -74,8 +77,8 @@ sparse data, use `-l 0' to keep the sparsity.
 Installation
 ============
 
-On Unix systems, type `make' to build the `train' and `predict'
-programs. Run them without arguments to show the usages.
+On Unix systems, type `make' to build the `train', `predict',
+and `svm-scale' programs. Run them without arguments to show the usages.
 
 On other systems, consult `Makefile' to build them (e.g., see
 'Building Windows binaries' in this file) or use the pre-built
@@ -85,11 +88,13 @@ This software uses some level-1 BLAS subroutines. The needed functions are
 included in this package.  If a BLAS library is available on your
 machine, you may use it by modifying the Makefile: Unmark the following line
 
-        #LIBS ?= -lblas
+        #LIBS = -lblas
 
 and mark
 
-        LIBS ?= blas/blas.a
+        LIBS = blas/blas.a
+
+The tool `svm-scale', borrowed from LIBSVM, is for scaling input data file.
 
 `train' Usage
 =============
@@ -97,33 +102,55 @@ and mark
 Usage: train [options] training_set_file [model_file]
 options:
 -s type : set type of solver (default 1)
-	0 -- L2-regularized logistic regression (primal)
-	1 -- L2-regularized L2-loss support vector classification (dual)
-	2 -- L2-regularized L2-loss support vector classification (primal)
-	3 -- L2-regularized L1-loss support vector classification (dual)
-	4 -- multi-class support vector classification by Crammer and Singer
-	5 -- L1-regularized L2-loss support vector classification
-	6 -- L1-regularized logistic regression
-	7 -- L2-regularized logistic regression (dual)
+  for multi-class classification
+	 0 -- L2-regularized logistic regression (primal)
+	 1 -- L2-regularized L2-loss support vector classification (dual)
+	 2 -- L2-regularized L2-loss support vector classification (primal)
+	 3 -- L2-regularized L1-loss support vector classification (dual)
+	 4 -- support vector classification by Crammer and Singer
+	 5 -- L1-regularized L2-loss support vector classification
+	 6 -- L1-regularized logistic regression
+	 7 -- L2-regularized logistic regression (dual)
+  for regression
+	11 -- L2-regularized L2-loss support vector regression (primal)
+	12 -- L2-regularized L2-loss support vector regression (dual)
+	13 -- L2-regularized L1-loss support vector regression (dual)
+  for outlier detection
+	21 -- one-class support vector machine (dual)
 -c cost : set the parameter C (default 1)
+-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
+-n nu : set the parameter nu of one-class SVM (default 0.5)
 -e epsilon : set tolerance of termination criterion
 	-s 0 and 2
 		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
 		where f is the primal function and pos/neg are # of
 		positive/negative data (default 0.01)
-	-s 1, 3, 4 and 7
-		Dual maximal violation <= eps; similar to libsvm (default 0.1)
+	-s 11
+		|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.0001)
+	-s 1, 3, 4, 7, and 21
+		Dual maximal violation <= eps; similar to libsvm (default 0.1 except 0.01 for -s 21)
 	-s 5 and 6
-		|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
+		|f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,
 		where f is the primal function (default 0.01)
+	-s 12 and 13
+		|f'(alpha)|_1 <= eps |f'(alpha0)|,
+		where f is the dual function (default 0.1)
 -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
+-R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is
+	(for -s 0, 2, 5, 6, 11)
 -wi weight: weights adjust the parameter C of different classes (see README for details)
 -v n: n-fold cross validation mode
+-C : find parameters (C for -s 0, 2 and C, p for -s 11)
 -q : quiet mode (no outputs)
 
 Option -v randomly splits the data into n parts and calculates cross
 validation accuracy on them.
 
+Option -C conducts cross validation under different parameters and finds
+the best one. This option is supported only by -s 0, -s 2 (for finding
+C) and -s 11 (for finding C, p). If the solver is not specified, -s 2
+is used.
+
 Formulations:
 
 For L2-regularized logistic regression (-s 0), we solve
@@ -152,23 +179,57 @@ For L1-regularized logistic regression (-s 6), we solve
 
 min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i))
 
-where
-
-Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
-
 For L2-regularized logistic regression (-s 7), we solve
 
 min_alpha  0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
     s.t.   0 <= alpha_i <= C,
 
-If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].
+where
+
+Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
+
+For L2-regularized L2-loss SVR (-s 11), we solve
+
+min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2
+
+For L2-regularized L2-loss SVR dual (-s 12), we solve
+
+min_beta  0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i|
+
+For L2-regularized L1-loss SVR dual (-s 13), we solve
+
+min_beta  0.5(beta^T Q beta) - y^T beta + \sum |beta_i|
+    s.t.   -C <= beta_i <= C,
+
+where
+
+Q is a matrix with Q_ij = x_i^T x_j.
+
+For one-class SVM dual (-s 21), we solve
+
+min_alpha 0.5(alpha^T Q alpha)
+    s.t.   0 <= alpha_i <= 1 and \sum alpha_i = nu*l,
+
+where
+
+Q is a matrix with Q_ij = x_i^T x_j.
+
+If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].  For
+example, L2-regularized logistic regression (-s 0) becomes
+
+min_w w^Tw/2 + (w_{n+1})^2/2 + C \sum log(1 + exp(-y_i [w; w_{n+1}]^T[x_i; bias]))
+
+Some may prefer not having (w_{n+1})^2/2 (i.e., bias variable not
+regularized). For primal solvers (-s 0, 2, 5, 6, 11), we provide an
+option -R to remove (w_{n+1})^2/2. However, -R is generally not needed
+as for most data with/without (w_{n+1})^2/2 give similar performances.
 
 The primal-dual relationship implies that -s 1 and -s 2 give the same
-model, and -s 0 and -s 7 give the same.
+model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same.
 
-We implement 1-vs-the rest multi-class strategy. In training i
-vs. non_i, their C parameters are (weight from -wi)*C and C,
-respectively. If there are only two classes, we train only one
+We implement 1-vs-the rest multi-class strategy for classification.
+In training i vs. non_i, their C parameters are (weight from -wi)*C
+and C, respectively. If there are only two classes, we train only one
 model. Thus weight1*C vs. weight2*C is used. See examples below.
 
 We also implement multi-class SVM by Crammer and Singer (-s 4):
@@ -193,7 +254,16 @@ and C^m_i = C if m  = y_i,
 
 Usage: predict [options] test_file model_file output_file
 options:
--b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0)
+-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
+-q : quiet mode (no outputs)
+
+Note that -b is only needed in the prediction phase. This is different
+from the setting of LIBSVM.
+
+`svm-scale' Usage
+=================
+
+See LIBSVM README.
 
 Examples
 ========
@@ -206,12 +276,38 @@ Train linear SVM with L2-loss function.
 
 Train a logistic regression model.
 
+> train -s 21 -n 0.1 data_file
+
+Train a linear one-class SVM which selects roughly 10% data as outliers.
+
 > train -v 5 -e 0.001 data_file
 
-Do five-fold cross-validation using L2-loss svm.
+Do five-fold cross-validation using L2-loss SVM.
 Use a smaller stopping tolerance 0.001 than the default
 0.1 if you want more accurate solutions.
 
+> train -C data_file
+...
+Best C = 0.000488281  CV accuracy = 83.3333%
+> train -c 0.000488281 data_file
+
+Conduct cross validation many times by L2-loss SVM and find the
+parameter C which achieves the best cross validation accuracy. Then
+use the selected C to train the data for getting a model.
+
+> train -C -s 0 -v 3 -c 0.5 -e 0.0001 data_file
+
+For parameter selection by -C, users can specify other
+solvers (currently -s 0, -s 2 and -s 11 are supported) and
+different number of CV folds. Further, users can use
+the -c option to specify the smallest C value of the
+search range. This option is useful when users want to
+rerun the parameter selection procedure from a specified
+C under a different setting, such as a stricter stopping
+tolerance -e 0.0001 in the above example. Similarly, for
+-s 11, users can use the -p option to specify the
+maximal p value of the search range.
+
 > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
 
 Train four classifiers:
@@ -233,18 +329,24 @@ Output probability estimates (for logistic regression only).
 Library Usage
 =============
 
+These functions and structures are declared in the header file `linear.h'.
+You can see `train.c' and `predict.c' for examples showing how to use them.
+We define LIBLINEAR_VERSION and declare `extern int liblinear_version; '
+in linear.h, so you can check the version number.
+
 - Function: model* train(const struct problem *prob,
                 const struct parameter *param);
 
-    This function constructs and returns a linear classification model
-    according to the given training data and parameters.
+    This function constructs and returns a linear classification
+    or regression model according to the given training data and
+    parameters.
 
     struct problem describes the problem:
 
         struct problem
         {
             int l, n;
-            int *y;
+            double *y;
             struct feature_node **x;
             double bias;
         };
@@ -252,10 +354,10 @@ Library Usage
     where `l' is the number of training data. If bias >= 0, we assume
     that one additional feature is added to the end of each data
     instance. `n' is the number of feature (including the bias feature
-    if bias >= 0). `y' is an array containing the target values. And
-    `x' is an array of pointers,
-    each of which points to a sparse representation (array of feature_node) of one
-    training vector.
+    if bias >= 0). `y' is an array containing the target values. (integers
+    in classification, real numbers in regression) And `x' is an array
+    of pointers, each of which points to a sparse representation (array
+    of feature_node) of one training vector.
 
     For example, if we have the following training data:
 
@@ -280,32 +382,44 @@ Library Usage
          [ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?)
          [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?)
 
-    struct parameter describes the parameters of a linear classification model:
+    struct parameter describes the parameters of a linear classification
+    or regression model:
 
         struct parameter
         {
                 int solver_type;
 
                 /* these are for training only */
-                double eps;             /* stopping criteria */
+                double eps;             /* stopping tolerance */
                 double C;
+                double nu;              /* one-class SVM only */
                 int nr_weight;
                 int *weight_label;
                 double* weight;
+                double p;
+                double *init_sol;
         };
 
-    solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL.
-
+    solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL, ONECLASS_SVM.
+  for classification
     L2R_LR                L2-regularized logistic regression (primal)
     L2R_L2LOSS_SVC_DUAL   L2-regularized L2-loss support vector classification (dual)
     L2R_L2LOSS_SVC        L2-regularized L2-loss support vector classification (primal)
     L2R_L1LOSS_SVC_DUAL   L2-regularized L1-loss support vector classification (dual)
-    MCSVM_CS              multi-class support vector classification by Crammer and Singer
+    MCSVM_CS              support vector classification by Crammer and Singer
     L1R_L2LOSS_SVC        L1-regularized L2-loss support vector classification
     L1R_LR                L1-regularized logistic regression
     L2R_LR_DUAL           L2-regularized logistic regression (dual)
+  for regression
+    L2R_L2LOSS_SVR        L2-regularized L2-loss support vector regression (primal)
+    L2R_L2LOSS_SVR_DUAL   L2-regularized L2-loss support vector regression (dual)
+    L2R_L1LOSS_SVR_DUAL   L2-regularized L1-loss support vector regression (dual)
+  for outlier detection
+    ONECLASS_SVM          one-class support vector machine (dual)
 
     C is the cost of constraints violation.
+    p is the sensitiveness of loss of support vector regression.
+    nu in ONECLASS_SVM approximates the fraction of data as outliers.
     eps is the stopping criterion.
 
     nr_weight, weight_label, and weight are used to change the penalty
@@ -320,6 +434,10 @@ Library Usage
     If you do not want to change penalty for any of the classes,
     just set nr_weight to 0.
 
+    init_sol includes the initial weight vectors (supported for only some
+    solvers). See the explanation of the vector w in the model
+    structure.
+
     *NOTE* To avoid wrong parameters, check_parameter() should be
     called before train().
 
@@ -333,13 +451,21 @@ Library Usage
                 double *w;
                 int *label;             /* label of each class */
                 double bias;
+                double rho;             /* one-class SVM only */
         };
 
      param describes the parameters used to obtain the model.
 
-     nr_class and nr_feature are the number of classes and features, respectively.
+     nr_class is the number of classes for classification. It is a
+     non-negative integer with special cases of 0 (no training data at
+     all) and 1 (all training data in one class). For regression and
+     one-class SVM, nr_class = 2.
 
-     The nr_feature*nr_class array w gives feature weights. We use one
+     nr_feature is the number of features.
+
+     The array w gives feature weights. Its size is
+     nr_feature*nr_class but is nr_feature if nr_class = 2 and the
+     solver is not MCSVM_CS (see more explanation below). We use one
      against the rest for multi-class classification, so each feature
      index corresponds to nr_class weight values. Weights are
      organized in the following way
@@ -349,13 +475,19 @@ Library Usage
      | for 1st feature  | for 2nd feature  |
      +------------------+------------------+------------+
 
+     The array label stores class labels.
+
+     When nr_class = 1 or 2, classification solvers (MCSVM_CS
+     excluded) return a single vector of weights by considering
+     label[0] as positive in training.
+
      If bias >= 0, x becomes [x; bias]. The number of features is
      increased by one, so w is a (nr_feature+1)*nr_class array. The
      value of bias is stored in the variable bias.
 
-     The array label stores class labels.
+     rho is the bias term used in one-class SVM only.
 
-- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target);
+- Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target);
 
     This function conducts cross validation. Data are separated to
     nr_fold folds. Under given parameters, sequentially each fold is
@@ -365,23 +497,53 @@ Library Usage
 
     The format of prob is same as that for train().
 
-- Function: int predict(const model *model_, const feature_node *x);
+- Function: void find_parameters(const struct problem *prob,
+            const struct parameter *param, int nr_fold, double start_C,
+            double start_p, double *best_C, double *best_p, double *best_score);
 
-    This functions classifies a test vector using the given
-    model. The predicted label is returned.
+    This function is similar to cross_validation. However, instead of
+    conducting cross validation under specified parameters. For -s 0, 2, it
+    conducts cross validation many times under parameters C = start_C,
+    2*start_C, 4*start_C, 8*start_C, ..., and finds the best one with
+    the highest cross validation accuracy. For -s 11, it conducts cross
+    validation many times with a two-fold loop. The outer loop considers a
+    default sequence of p = 19/20*max_p, ..., 1/20*max_p, 0 and
+    under each p value the inner loop considers a sequence of parameters
+    C = start_C, 2*start_C, 4*start_C, ..., and finds the best one with the
+    lowest mean squared error.
 
-- Function: int predict_values(const struct model *model_,
+    If start_C <= 0, then this procedure calculates a small enough C
+    for prob as the start_C. The procedure stops when the models of
+    all folds become stable or C reaches max_C.
+
+    If start_p <= 0, then this procedure calculates a maximal p for prob as
+    the start_p. Otherwise, the procedure starts with the first
+    i/20*max_p <= start_p so the outer sequence is i/20*max_p,
+    (i-1)/20*max_p, ..., 0.
+
+    The best C, the best p, and the corresponding accuracy (or MSE) are
+    assigned to *best_C, *best_p and *best_score, respectively. For
+    classification, *best_p is not used, and the returned value is -1.
+
+- Function: double predict(const model *model_, const feature_node *x);
+
+    For a classification model, the predicted class for x is returned.
+    For a regression model, the function value of x calculated using
+    the model is returned.
+
+- Function: double predict_values(const struct model *model_,
             const struct feature_node *x, double* dec_values);
 
-    This function gives nr_w decision values in the array
-    dec_values. nr_w is 1 if there are two classes except multi-class
-    svm by Crammer and Singer (-s 4), and is the number of classes otherwise.
+    This function gives nr_w decision values in the array dec_values.
+    nr_w=1 if regression is applied or the number of classes is two. An exception is
+    multi-class SVM by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the
+    number of classes.
 
-    We implement one-vs-the rest multi-class strategy (-s 0,1,2,3) and
-    multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
+    We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7)
+    and multi-class SVM by Crammer and Singer (-s 4) for multi-class SVM.
     The class with the highest decision value is returned.
 
-- Function: int predict_probability(const struct model *model_,
+- Function: double predict_probability(const struct model *model_,
             const struct feature_node *x, double* prob_estimates);
 
     This function gives nr_class probability estimates in the array
@@ -397,10 +559,36 @@ Library Usage
 - Function: int get_nr_class(const model *model_);
 
     The function gives the number of classes of the model.
+    For a regression model, 2 is returned.
 
 - Function: void get_labels(const model *model_, int* label);
 
     This function outputs the name of labels into an array called label.
+    For a regression model, label is unchanged.
+
+- Function: double get_decfun_coef(const struct model *model_, int feat_idx,
+            int label_idx);
+
+    This function gives the coefficient for the feature with feature index =
+    feat_idx and the class with label index = label_idx. Note that feat_idx
+    starts from 1, while label_idx starts from 0. If feat_idx is not in the
+    valid range (1 to nr_feature), then a zero value will be returned. For
+    classification models, if label_idx is not in the valid range (0 to
+    nr_class-1), then a zero value will be returned; for regression models
+    and one-class SVM models, label_idx is ignored.
+
+- Function: double get_decfun_bias(const struct model *model_, int label_idx);
+
+    This function gives the bias term corresponding to the class with the
+    label_idx. For classification models, if label_idx is not in a valid range
+    (0 to nr_class-1), then a zero value will be returned; for regression
+    models, label_idx is ignored. This function cannot be called for a one-class
+    SVM model.
+
+- Function: double get_decfun_rho(const struct model *model_);
+
+    This function gives rho, the bias term used in one-class SVM only. This
+    function can only be called for a one-class SVM model.
 
 - Function: const char *check_parameter(const struct problem *prob,
             const struct parameter *param);
@@ -410,6 +598,21 @@ Library Usage
     train() and cross_validation(). It returns NULL if the
     parameters are feasible, otherwise an error message is returned.
 
+- Function: int check_probability_model(const struct model *model);
+
+    This function returns 1 if the model supports probability output;
+    otherwise, it returns 0.
+
+- Function: int check_regression_model(const struct model *model);
+
+    This function returns 1 if the model is a regression model; otherwise
+    it returns 0.
+
+- Function: int check_oneclass_model(const struct model *model);
+
+    This function returns 1 if the model is a one-class SVM model; otherwise
+    it returns 0.
+
 - Function: int save_model(const char *model_file_name,
             const struct model *model_);
 
@@ -437,19 +640,19 @@ Library Usage
 - Function: void set_print_string_function(void (*print_func)(const char *));
 
     Users can specify their output format by a function. Use
-        set_print_string_function(NULL); 
+        set_print_string_function(NULL);
     for default printing to stdout.
 
 Building Windows Binaries
 =========================
 
-Windows binaries are in the directory `windows'. To build them via
-Visual C++, use the following steps:
+Windows binaries are available in the directory `windows'. To re-build
+them via Visual C++, use the following steps:
 
 1. Open a dos command box and change to liblinear directory. If
 environment variables of VC++ have not been set, type
 
-"C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat"
+"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
 
 You may have to modify the above command according which version of
 VC++ or where it is installed.
@@ -458,13 +661,20 @@ VC++ or where it is installed.
 
 nmake -f Makefile.win clean all
 
+3. (optional) To build shared library liblinear.dll, type
+
+nmake -f Makefile.win lib
+
+4. (Optional) To build 32-bit windows binaries, you must
+        (1) Setup "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars32.bat" instead of vcvars64.bat
+        (2) Change CFLAGS in Makefile.win: /D _WIN64 to /D _WIN32
 
 MATLAB/OCTAVE Interface
 =======================
 
 Please check the file README in the directory `matlab'.
 
-PYTHON Interface
+Python Interface
 ================
 
 Please check the file README in the directory `python'.
diff --git a/liblinear/blas/Makefile b/liblinear/blas/Makefile
index 255d574d5..e0293a41c 100644
--- a/liblinear/blas/Makefile
+++ b/liblinear/blas/Makefile
@@ -1,14 +1,14 @@
-AR     = ar
-RANLIB = ranlib 
+AR     ?= ar
+RANLIB ?= ranlib
 
-HEADERS = blas.h blas.h blasp.h
-FILES = dnrm2.o daxpy.o ddot.o dscal.o 
+HEADERS = blas.h blasp.h
+FILES = dnrm2.o daxpy.o ddot.o dscal.o
 
-CFLAGS = $(OPTFLAGS) 
+CFLAGS = $(OPTFLAGS)
 FFLAGS = $(OPTFLAGS)
 
 blas: $(FILES) $(HEADERS)
-	$(AR) rcv blas.a $(FILES)  
+	$(AR) rcv blas.a $(FILES)
 	$(RANLIB) blas.a
 
 clean:
diff --git a/liblinear/blas/blasp.h b/liblinear/blas/blasp.h
index 745836db8..d0fe8eca0 100644
--- a/liblinear/blas/blasp.h
+++ b/liblinear/blas/blasp.h
@@ -3,6 +3,10 @@
 
 /* Functions  listed in alphabetical order */
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #ifdef F2C_COMPAT
 
 void cdotc_(fcomplex *dotval, int *n, fcomplex *cx, int *incx,
@@ -166,7 +170,7 @@ int dgemm_(char *transa, char *transb, int *m, int *n, int *k,
            double *beta, double *c, int *ldc);
 
 int dgemv_(char *trans, int *m, int *n, double *alpha, double *a,
-           int *lda, double *x, int *incx, double *beta, double *y, 
+           int *lda, double *x, int *incx, double *beta, double *y,
            int *incy);
 
 int dger_(int *m, int *n, double *alpha, double *x, int *incx,
@@ -178,7 +182,7 @@ int drot_(int *n, double *sx, int *incx, double *sy, int *incy,
 int drotg_(double *sa, double *sb, double *c, double *s);
 
 int dsbmv_(char *uplo, int *n, int *k, double *alpha, double *a,
-           int *lda, double *x, int *incx, double *beta, double *y, 
+           int *lda, double *x, int *incx, double *beta, double *y,
            int *incy);
 
 int dscal_(int *n, double *sa, double *sx, int *incx);
@@ -227,14 +231,14 @@ int dtpsv_(char *uplo, char *trans, char *diag, int *n, double *ap,
            double *x, int *incx);
 
 int dtrmm_(char *side, char *uplo, char *transa, char *diag, int *m,
-           int *n, double *alpha, double *a, int *lda, double *b, 
+           int *n, double *alpha, double *a, int *lda, double *b,
            int *ldb);
 
 int dtrmv_(char *uplo, char *trans, char *diag, int *n, double *a,
            int *lda, double *x, int *incx);
 
 int dtrsm_(char *side, char *uplo, char *transa, char *diag, int *m,
-           int *n, double *alpha, double *a, int *lda, double *b, 
+           int *n, double *alpha, double *a, int *lda, double *b,
            int *ldb);
 
 int dtrsv_(char *uplo, char *trans, char *diag, int *n, double *a,
@@ -254,7 +258,7 @@ int sgemm_(char *transa, char *transb, int *m, int *n, int *k,
            float *beta, float *c, int *ldc);
 
 int sgemv_(char *trans, int *m, int *n, float *alpha, float *a,
-           int *lda, float *x, int *incx, float *beta, float *y, 
+           int *lda, float *x, int *incx, float *beta, float *y,
            int *incy);
 
 int sger_(int *m, int *n, float *alpha, float *x, int *incx,
@@ -266,7 +270,7 @@ int srot_(int *n, float *sx, int *incx, float *sy, int *incy,
 int srotg_(float *sa, float *sb, float *c, float *s);
 
 int ssbmv_(char *uplo, int *n, int *k, float *alpha, float *a,
-           int *lda, float *x, int *incx, float *beta, float *y, 
+           int *lda, float *x, int *incx, float *beta, float *y,
            int *incy);
 
 int sscal_(int *n, float *sa, float *sx, int *incx);
@@ -315,14 +319,14 @@ int stpsv_(char *uplo, char *trans, char *diag, int *n, float *ap,
            float *x, int *incx);
 
 int strmm_(char *side, char *uplo, char *transa, char *diag, int *m,
-           int *n, float *alpha, float *a, int *lda, float *b, 
+           int *n, float *alpha, float *a, int *lda, float *b,
            int *ldb);
 
 int strmv_(char *uplo, char *trans, char *diag, int *n, float *a,
            int *lda, float *x, int *incx);
 
 int strsm_(char *side, char *uplo, char *transa, char *diag, int *m,
-           int *n, float *alpha, float *a, int *lda, float *b, 
+           int *n, float *alpha, float *a, int *lda, float *b,
            int *ldb);
 
 int strsv_(char *uplo, char *trans, char *diag, int *n, float *a,
@@ -428,3 +432,7 @@ int ztrsm_(char *side, char *uplo, char *transa, char *diag, int *m,
 
 int ztrsv_(char *uplo, char *trans, char *diag, int *n, dcomplex *a,
            int *lda, dcomplex *x, int *incx);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/liblinear/blas/daxpy.c b/liblinear/blas/daxpy.c
index 58f345a53..0a3c5942b 100644
--- a/liblinear/blas/daxpy.c
+++ b/liblinear/blas/daxpy.c
@@ -1,14 +1,18 @@
 #include "blas.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
            int *incy)
 {
   long int i, m, ix, iy, nn, iincx, iincy;
   register double ssa;
 
-  /* constant times a vector plus a vector.   
-     uses unrolled loop for increments equal to one.   
-     jack dongarra, linpack, 3/11/78.   
+  /* constant times a vector plus a vector.
+     uses unrolled loop for increments equal to one.
+     jack dongarra, linpack, 3/11/78.
      modified 12/3/93, array(1) declarations changed to array(*) */
 
   /* Dereference inputs */
@@ -47,3 +51,7 @@ int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
 
   return 0;
 } /* daxpy_ */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/liblinear/blas/ddot.c b/liblinear/blas/ddot.c
index a64a2808f..294f4ed84 100644
--- a/liblinear/blas/ddot.c
+++ b/liblinear/blas/ddot.c
@@ -1,14 +1,18 @@
 #include "blas.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
 {
   long int i, m, nn, iincx, iincy;
   double stemp;
   long int ix, iy;
 
-  /* forms the dot product of two vectors.   
-     uses unrolled loops for increments equal to one.   
-     jack dongarra, linpack, 3/11/78.   
+  /* forms the dot product of two vectors.
+     uses unrolled loops for increments equal to one.
+     jack dongarra, linpack, 3/11/78.
      modified 12/3/93, array(1) declarations changed to array(*) */
 
   /* Dereference inputs */
@@ -48,3 +52,7 @@ double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
 
   return stemp;
 } /* ddot_ */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/liblinear/blas/dnrm2.c b/liblinear/blas/dnrm2.c
index e50cdf777..d80b1a5be 100644
--- a/liblinear/blas/dnrm2.c
+++ b/liblinear/blas/dnrm2.c
@@ -1,18 +1,22 @@
 #include <math.h>  /* Needed for fabs() and sqrt() */
 #include "blas.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 double dnrm2_(int *n, double *x, int *incx)
 {
   long int ix, nn, iincx;
   double norm, scale, absxi, ssq, temp;
 
-/*  DNRM2 returns the euclidean norm of a vector via the function   
-    name, so that   
+/*  DNRM2 returns the euclidean norm of a vector via the function
+    name, so that
 
-       DNRM2 := sqrt( x'*x )   
+       DNRM2 := sqrt( x'*x )
 
-    -- This version written on 25-October-1982.   
-       Modified on 14-October-1993 to inline the call to SLASSQ.   
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to SLASSQ.
        Sven Hammarling, Nag Ltd.   */
 
   /* Dereference inputs */
@@ -24,13 +28,13 @@ double dnrm2_(int *n, double *x, int *incx)
     if (nn == 1)
     {
       norm = fabs(x[0]);
-    }  
+    }
     else
     {
       scale = 0.0;
       ssq = 1.0;
 
-      /* The following loop is equivalent to this call to the LAPACK 
+      /* The following loop is equivalent to this call to the LAPACK
          auxiliary routine:   CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
 
       for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
@@ -60,3 +64,7 @@ double dnrm2_(int *n, double *x, int *incx)
   return norm;
 
 } /* dnrm2_ */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/liblinear/blas/dscal.c b/liblinear/blas/dscal.c
index a0eca0c39..5edbe8b25 100644
--- a/liblinear/blas/dscal.c
+++ b/liblinear/blas/dscal.c
@@ -1,14 +1,18 @@
 #include "blas.h"
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 int dscal_(int *n, double *sa, double *sx, int *incx)
 {
   long int i, m, nincx, nn, iincx;
   double ssa;
 
-  /* scales a vector by a constant.   
-     uses unrolled loops for increment equal to 1.   
-     jack dongarra, linpack, 3/11/78.   
-     modified 3/93 to return if incx .le. 0.   
+  /* scales a vector by a constant.
+     uses unrolled loops for increment equal to 1.
+     jack dongarra, linpack, 3/11/78.
+     modified 3/93 to return if incx .le. 0.
      modified 12/3/93, array(1) declarations changed to array(*) */
 
   /* Dereference inputs */
@@ -42,3 +46,7 @@ int dscal_(int *n, double *sa, double *sx, int *incx)
 
   return 0;
 } /* dscal_ */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/liblinear/liblinear.vcxproj b/liblinear/liblinear.vcxproj
index 3158c031f..2a2beef08 100755
--- a/liblinear/liblinear.vcxproj
+++ b/liblinear/liblinear.vcxproj
@@ -16,13 +16,13 @@
     <ClCompile Include="blas\dnrm2.c" />
     <ClCompile Include="blas\dscal.c" />
     <ClCompile Include="linear.cpp" />
-    <ClCompile Include="tron.cpp" />
+    <ClCompile Include="newton.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="blas\blas.h" />
     <ClInclude Include="blas\blasp.h" />
     <ClInclude Include="linear.h" />
-    <ClInclude Include="tron.h" />
+    <ClInclude Include="newton.h" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{A7BE3D76-F20C-40C5-8986-DE4028B3B57D}</ProjectGuid>
@@ -96,4 +96,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/liblinear/linear.cpp b/liblinear/linear.cpp
index 152370373..70b79270a 100644
--- a/liblinear/linear.cpp
+++ b/liblinear/linear.cpp
@@ -3,8 +3,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
+#include <locale.h>
 #include "linear.h"
-#include "tron.h"
+#include "newton.h"
+int liblinear_version = LIBLINEAR_VERSION;
 typedef signed char schar;
 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
 #ifndef min
@@ -14,18 +16,19 @@ template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
 #endif
 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
-{   
+{
 	dst = new T[n];
 	memcpy((void *)dst,(void *)src,sizeof(T)*n);
 }
-#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 #define INF HUGE_VAL
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 
 static void print_string_stdout(const char *s)
 {
 	fputs(s,stdout);
 	fflush(stdout);
 }
+static void print_null(const char *s) {}
 
 static void (*liblinear_print_string) (const char *) = &print_string_stdout;
 
@@ -42,143 +45,205 @@ static void info(const char *fmt,...)
 #else
 static void info(const char *fmt,...) {}
 #endif
-
-class l2r_lr_fun : public function
+class sparse_operator
 {
 public:
-	l2r_lr_fun(const problem *prob, double Cp, double Cn);
-	~l2r_lr_fun();
+	static double nrm2_sq(const feature_node *x)
+	{
+		double ret = 0;
+		while(x->index != -1)
+		{
+			ret += x->value*x->value;
+			x++;
+		}
+		return ret;
+	}
+
+	static double dot(const double *s, const feature_node *x)
+	{
+		double ret = 0;
+		while(x->index != -1)
+		{
+			ret += s[x->index-1]*x->value;
+			x++;
+		}
+		return ret;
+	}
+
+	static double sparse_dot(const feature_node *x1, const feature_node *x2)
+	{
+		double ret = 0;
+		while(x1->index != -1 && x2->index != -1)
+		{
+			if(x1->index == x2->index)
+			{
+				ret += x1->value * x2->value;
+				++x1;
+				++x2;
+			}
+			else
+			{
+				if(x1->index > x2->index)
+					++x2;
+				else
+					++x1;
+			}
+		}
+		return ret;
+	}
+
+	static void axpy(const double a, const feature_node *x, double *y)
+	{
+		while(x->index != -1)
+		{
+			y[x->index-1] += a*x->value;
+			x++;
+		}
+	}
+};
+
+// L2-regularized empirical risk minimization
+// min_w w^Tw/2 + \sum C_i \xi(w^Tx_i), where \xi() is the loss
+
+class l2r_erm_fun: public function
+{
+public:
+	l2r_erm_fun(const problem *prob, const parameter *param, double *C);
+	~l2r_erm_fun();
 
 	double fun(double *w);
-	void grad(double *w, double *g);
-	void Hv(double *s, double *Hs);
-
+	double linesearch_and_update(double *w, double *d, double *f, double *g, double alpha);
 	int get_nr_variable(void);
 
-private:
+protected:
+	virtual double C_times_loss(int i, double wx_i) = 0;
 	void Xv(double *v, double *Xv);
 	void XTv(double *v, double *XTv);
 
 	double *C;
-	double *z;
-	double *D;
 	const problem *prob;
+	double *wx;
+	double *tmp; // a working array
+	double wTw;
+	int regularize_bias;
 };
 
-l2r_lr_fun::l2r_lr_fun(const problem *prob, double Cp, double Cn)
+l2r_erm_fun::l2r_erm_fun(const problem *prob, const parameter *param, double *C)
 {
-	int i;
 	int l=prob->l;
-	int *y=prob->y;
 
 	this->prob = prob;
 
-	z = new double[l];
-	D = new double[l];
-	C = new double[l];
-
-	for (i=0; i<l; i++)
-	{
-		if (y[i] == 1)
-			C[i] = Cp;
-		else
-			C[i] = Cn;
-	}
+	wx = new double[l];
+	tmp = new double[l];
+	this->C = C;
+	this->regularize_bias = param->regularize_bias;
 }
 
-l2r_lr_fun::~l2r_lr_fun()
+l2r_erm_fun::~l2r_erm_fun()
 {
-	delete[] z;
-	delete[] D;
-	delete[] C;
+	delete[] wx;
+	delete[] tmp;
 }
 
-
-double l2r_lr_fun::fun(double *w)
+double l2r_erm_fun::fun(double *w)
 {
 	int i;
 	double f=0;
-	int *y=prob->y;
 	int l=prob->l;
 	int w_size=get_nr_variable();
 
-	Xv(w, z);
-	for(i=0;i<l;i++)
-	{
-		double yz = y[i]*z[i];
-		if (yz >= 0)
-			f += C[i]*log(1 + exp(-yz));
-		else
-			f += C[i]*(-yz+log(1 + exp(yz)));
-	}
-	f = 2*f;
-	for(i=0;i<w_size;i++)
-		f += w[i]*w[i];
-	f /= 2.0;
-
-	return(f);
-}
-
-void l2r_lr_fun::grad(double *w, double *g)
-{
-	int i;
-	int *y=prob->y;
-	int l=prob->l;
-	int w_size=get_nr_variable();
-
-	for(i=0;i<l;i++)
-	{
-		z[i] = 1/(1 + exp(-y[i]*z[i]));
-		D[i] = z[i]*(1-z[i]);
-		z[i] = C[i]*(z[i]-1)*y[i];
-	}
-	XTv(z, g);
+	wTw = 0;
+	Xv(w, wx);
 
 	for(i=0;i<w_size;i++)
-		g[i] = w[i] + g[i];
+		wTw += w[i]*w[i];
+	if(regularize_bias == 0)
+		wTw -= w[w_size-1]*w[w_size-1];
+	for(i=0;i<l;i++)
+		f += C_times_loss(i, wx[i]);
+	f = f + 0.5 * wTw;
+
+	return f;
 }
 
-int l2r_lr_fun::get_nr_variable(void)
+int l2r_erm_fun::get_nr_variable(void)
 {
 	return prob->n;
 }
 
-void l2r_lr_fun::Hv(double *s, double *Hs)
+// On entry *f must be the function value of w
+// On exit w is updated and *f is the new function value
+double l2r_erm_fun::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
 {
 	int i;
-	int l=prob->l;
-	int w_size=get_nr_variable();
-	double *wa = new double[l];
+	int l = prob->l;
+	double sTs = 0;
+	double wTs = 0;
+	double gTs = 0;
+	double eta = 0.01;
+	int w_size = get_nr_variable();
+	int max_num_linesearch = 20;
+	double fold = *f;
+	Xv(s, tmp);
 
-	Xv(s, wa);
-	for(i=0;i<l;i++)
-		wa[i] = C[i]*D[i]*wa[i];
+	for (i=0;i<w_size;i++)
+	{
+		sTs += s[i] * s[i];
+		wTs += s[i] * w[i];
+		gTs += s[i] * g[i];
+	}
+	if(regularize_bias == 0)
+	{
+		// bias not used in calculating (w + \alpha s)^T (w + \alpha s)
+		sTs -= s[w_size-1] * s[w_size-1];
+		wTs -= s[w_size-1] * w[w_size-1];
+	}
 
-	XTv(wa, Hs);
-	for(i=0;i<w_size;i++)
-		Hs[i] = s[i] + Hs[i];
-	delete[] wa;
+	int num_linesearch = 0;
+	for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
+	{
+		double loss = 0;
+		for(i=0;i<l;i++)
+		{
+			double inner_product = tmp[i] * alpha + wx[i];
+			loss += C_times_loss(i, inner_product);
+		}
+		*f = loss + (alpha * alpha * sTs + wTw) / 2.0 + alpha * wTs;
+		if (*f - fold <= eta * alpha * gTs)
+		{
+			for (i=0;i<l;i++)
+				wx[i] += alpha * tmp[i];
+			break;
+		}
+		else
+			alpha *= 0.5;
+	}
+
+	if (num_linesearch >= max_num_linesearch)
+	{
+		*f = fold;
+		return 0;
+	}
+	else
+		for (i=0;i<w_size;i++)
+			w[i] += alpha * s[i];
+
+	wTw += alpha * alpha * sTs + 2* alpha * wTs;
+	return alpha;
 }
 
-void l2r_lr_fun::Xv(double *v, double *Xv)
+void l2r_erm_fun::Xv(double *v, double *Xv)
 {
 	int i;
 	int l=prob->l;
 	feature_node **x=prob->x;
 
 	for(i=0;i<l;i++)
-	{
-		feature_node *s=x[i];
-		Xv[i]=0;
-		while(s->index!=-1)
-		{
-			Xv[i]+=v[s->index-1]*s->value;
-			s++;
-		}
-	}
+		Xv[i]=sparse_operator::dot(v, x[i]);
 }
 
-void l2r_lr_fun::XTv(double *v, double *XTv)
+void l2r_erm_fun::XTv(double *v, double *XTv)
 {
 	int i;
 	int l=prob->l;
@@ -187,172 +252,226 @@ void l2r_lr_fun::XTv(double *v, double *XTv)
 
 	for(i=0;i<w_size;i++)
 		XTv[i]=0;
+	for(i=0;i<l;i++)
+		sparse_operator::axpy(v[i], x[i], XTv);
+}
+
+class l2r_lr_fun: public l2r_erm_fun
+{
+public:
+	l2r_lr_fun(const problem *prob, const parameter *param, double *C);
+	~l2r_lr_fun();
+
+	void grad(double *w, double *g);
+	void Hv(double *s, double *Hs);
+
+	void get_diag_preconditioner(double *M);
+
+private:
+	double *D;
+	double C_times_loss(int i, double wx_i);
+};
+
+l2r_lr_fun::l2r_lr_fun(const problem *prob, const parameter *param, double *C):
+	l2r_erm_fun(prob, param, C)
+{
+	int l=prob->l;
+	D = new double[l];
+}
+
+l2r_lr_fun::~l2r_lr_fun()
+{
+	delete[] D;
+}
+
+double l2r_lr_fun::C_times_loss(int i, double wx_i)
+{
+	double ywx_i = wx_i * prob->y[i];
+	if (ywx_i >= 0)
+		return C[i]*log(1 + exp(-ywx_i));
+	else
+		return C[i]*(-ywx_i + log(1 + exp(ywx_i)));
+}
+
+void l2r_lr_fun::grad(double *w, double *g)
+{
+	int i;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+
 	for(i=0;i<l;i++)
 	{
-		feature_node *s=x[i];
-		while(s->index!=-1)
+		tmp[i] = 1/(1 + exp(-y[i]*wx[i]));
+		D[i] = tmp[i]*(1-tmp[i]);
+		tmp[i] = C[i]*(tmp[i]-1)*y[i];
+	}
+	XTv(tmp, g);
+
+	for(i=0;i<w_size;i++)
+		g[i] = w[i] + g[i];
+	if(regularize_bias == 0)
+		g[w_size-1] -= w[w_size-1];
+}
+
+void l2r_lr_fun::get_diag_preconditioner(double *M)
+{
+	int i;
+	int l = prob->l;
+	int w_size=get_nr_variable();
+	feature_node **x = prob->x;
+
+	for (i=0; i<w_size; i++)
+		M[i] = 1;
+	if(regularize_bias == 0)
+		M[w_size-1] = 0;
+
+	for (i=0; i<l; i++)
+	{
+		feature_node *xi = x[i];
+		while (xi->index!=-1)
 		{
-			XTv[s->index-1]+=v[i]*s->value;
-			s++;
+			M[xi->index-1] += xi->value*xi->value*C[i]*D[i];
+			xi++;
 		}
 	}
 }
 
-class l2r_l2_svc_fun : public function
-{
-public:
-	l2r_l2_svc_fun(const problem *prob, double Cp, double Cn);
-	~l2r_l2_svc_fun();
-
-	double fun(double *w);
-	void grad(double *w, double *g);
-	void Hv(double *s, double *Hs);
-
-	int get_nr_variable(void);
-
-private:
-	void Xv(double *v, double *Xv);
-	void subXv(double *v, double *Xv);
-	void subXTv(double *v, double *XTv);
-
-	double *C;
-	double *z;
-	double *D;
-	int *I;
-	int sizeI;
-	const problem *prob;
-};
-
-l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, double Cp, double Cn)
+void l2r_lr_fun::Hv(double *s, double *Hs)
 {
 	int i;
 	int l=prob->l;
-	int *y=prob->y;
+	int w_size=get_nr_variable();
+	feature_node **x=prob->x;
 
-	this->prob = prob;
-
-	z = new double[l];
-	D = new double[l];
-	C = new double[l];
-	I = new int[l];
-
-	for (i=0; i<l; i++)
+	for(i=0;i<w_size;i++)
+		Hs[i] = 0;
+	for(i=0;i<l;i++)
 	{
-		if (y[i] == 1)
-			C[i] = Cp;
-		else
-			C[i] = Cn;
+		feature_node * const xi=x[i];
+		double xTs = sparse_operator::dot(s, xi);
+
+		xTs = C[i]*D[i]*xTs;
+
+		sparse_operator::axpy(xTs, xi, Hs);
 	}
+	for(i=0;i<w_size;i++)
+		Hs[i] = s[i] + Hs[i];
+	if(regularize_bias == 0)
+		Hs[w_size-1] -= s[w_size-1];
+}
+
+class l2r_l2_svc_fun: public l2r_erm_fun
+{
+public:
+	l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C);
+	~l2r_l2_svc_fun();
+
+	void grad(double *w, double *g);
+	void Hv(double *s, double *Hs);
+
+	void get_diag_preconditioner(double *M);
+
+protected:
+	void subXTv(double *v, double *XTv);
+
+	int *I;
+	int sizeI;
+
+private:
+	double C_times_loss(int i, double wx_i);
+};
+
+l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *prob, const parameter *param, double *C):
+	l2r_erm_fun(prob, param, C)
+{
+	I = new int[prob->l];
 }
 
 l2r_l2_svc_fun::~l2r_l2_svc_fun()
 {
-	delete[] z;
-	delete[] D;
-	delete[] C;
 	delete[] I;
 }
 
-double l2r_l2_svc_fun::fun(double *w)
+double l2r_l2_svc_fun::C_times_loss(int i, double wx_i)
 {
-	int i;
-	double f=0;
-	int *y=prob->y;
-	int l=prob->l;
-	int w_size=get_nr_variable();
-
-	Xv(w, z);
-	for(i=0;i<l;i++)
-	{
-		z[i] = y[i]*z[i];
-		double d = 1-z[i];
-		if (d > 0)
-			f += C[i]*d*d;
-	}
-	f = 2*f;
-	for(i=0;i<w_size;i++)
-		f += w[i]*w[i];
-	f /= 2.0;
-
-	return(f);
+	double d = 1 - prob->y[i] * wx_i;
+	if (d > 0)
+		return C[i] * d * d;
+	else
+		return 0;
 }
 
 void l2r_l2_svc_fun::grad(double *w, double *g)
 {
 	int i;
-	int *y=prob->y;
+	double *y=prob->y;
 	int l=prob->l;
 	int w_size=get_nr_variable();
 
 	sizeI = 0;
 	for (i=0;i<l;i++)
-		if (z[i] < 1)
+	{
+		tmp[i] = wx[i] * y[i];
+		if (tmp[i] < 1)
 		{
-			z[sizeI] = C[i]*y[i]*(z[i]-1);
+			tmp[sizeI] = C[i]*y[i]*(tmp[i]-1);
 			I[sizeI] = i;
 			sizeI++;
 		}
-	subXTv(z, g);
+	}
+	subXTv(tmp, g);
 
 	for(i=0;i<w_size;i++)
 		g[i] = w[i] + 2*g[i];
+	if(regularize_bias == 0)
+		g[w_size-1] -= w[w_size-1];
 }
 
-int l2r_l2_svc_fun::get_nr_variable(void)
+void l2r_l2_svc_fun::get_diag_preconditioner(double *M)
 {
-	return prob->n;
+	int i;
+	int w_size=get_nr_variable();
+	feature_node **x = prob->x;
+
+	for (i=0; i<w_size; i++)
+		M[i] = 1;
+	if(regularize_bias == 0)
+		M[w_size-1] = 0;
+
+	for (i=0; i<sizeI; i++)
+	{
+		int idx = I[i];
+		feature_node *xi = x[idx];
+		while (xi->index!=-1)
+		{
+			M[xi->index-1] += xi->value*xi->value*C[idx]*2;
+			xi++;
+		}
+	}
 }
 
 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
 {
 	int i;
-	int l=prob->l;
 	int w_size=get_nr_variable();
-	double *wa = new double[l];
+	feature_node **x=prob->x;
 
-	subXv(s, wa);
+	for(i=0;i<w_size;i++)
+		Hs[i]=0;
 	for(i=0;i<sizeI;i++)
-		wa[i] = C[I[i]]*wa[i];
+	{
+		feature_node * const xi=x[I[i]];
+		double xTs = sparse_operator::dot(s, xi);
 
-	subXTv(wa, Hs);
+		xTs = C[I[i]]*xTs;
+
+		sparse_operator::axpy(xTs, xi, Hs);
+	}
 	for(i=0;i<w_size;i++)
 		Hs[i] = s[i] + 2*Hs[i];
-	delete[] wa;
-}
-
-void l2r_l2_svc_fun::Xv(double *v, double *Xv)
-{
-	int i;
-	int l=prob->l;
-	feature_node **x=prob->x;
-
-	for(i=0;i<l;i++)
-	{
-		feature_node *s=x[i];
-		Xv[i]=0;
-		while(s->index!=-1)
-		{
-			Xv[i]+=v[s->index-1]*s->value;
-			s++;
-		}
-	}
-}
-
-void l2r_l2_svc_fun::subXv(double *v, double *Xv)
-{
-	int i;
-	feature_node **x=prob->x;
-
-	for(i=0;i<sizeI;i++)
-	{
-		feature_node *s=x[I[i]];
-		Xv[i]=0;
-		while(s->index!=-1)
-		{
-			Xv[i]+=v[s->index-1]*s->value;
-			s++;
-		}
-	}
+	if(regularize_bias == 0)
+		Hs[w_size-1] -= s[w_size-1];
 }
 
 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
@@ -364,29 +483,87 @@ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
 	for(i=0;i<w_size;i++)
 		XTv[i]=0;
 	for(i=0;i<sizeI;i++)
-	{
-		feature_node *s=x[I[i]];
-		while(s->index!=-1)
-		{
-			XTv[s->index-1]+=v[i]*s->value;
-			s++;
-		}
-	}
+		sparse_operator::axpy(v[i], x[I[i]], XTv);
 }
 
-// A coordinate descent algorithm for 
+class l2r_l2_svr_fun: public l2r_l2_svc_fun
+{
+public:
+	l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C);
+
+	void grad(double *w, double *g);
+
+private:
+	double C_times_loss(int i, double wx_i);
+	double p;
+};
+
+l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, const parameter *param, double *C):
+	l2r_l2_svc_fun(prob, param, C)
+{
+	this->p = param->p;
+	this->regularize_bias = param->regularize_bias;
+}
+
+double l2r_l2_svr_fun::C_times_loss(int i, double wx_i)
+{
+	double d = wx_i - prob->y[i];
+	if(d < -p)
+		return C[i]*(d+p)*(d+p);
+	else if(d > p)
+		return C[i]*(d-p)*(d-p);
+	return 0;
+}
+
+void l2r_l2_svr_fun::grad(double *w, double *g)
+{
+	int i;
+	double *y=prob->y;
+	int l=prob->l;
+	int w_size=get_nr_variable();
+	double d;
+
+	sizeI = 0;
+	for(i=0;i<l;i++)
+	{
+		d = wx[i] - y[i];
+
+		// generate index set I
+		if(d < -p)
+		{
+			tmp[sizeI] = C[i]*(d+p);
+			I[sizeI] = i;
+			sizeI++;
+		}
+		else if(d > p)
+		{
+			tmp[sizeI] = C[i]*(d-p);
+			I[sizeI] = i;
+			sizeI++;
+		}
+
+	}
+	subXTv(tmp, g);
+
+	for(i=0;i<w_size;i++)
+		g[i] = w[i] + 2*g[i];
+	if(regularize_bias == 0)
+		g[w_size-1] -= w[w_size-1];
+}
+
+// A coordinate descent algorithm for
 // multi-class support vector machines by Crammer and Singer
 //
 //  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
 //    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
-// 
+//
 //  where e^m_i = 0 if y_i  = m,
 //        e^m_i = 1 if y_i != m,
-//  C^m_i = C if m  = y_i, 
-//  C^m_i = 0 if m != y_i, 
-//  and w_m(\alpha) = \sum_i \alpha^m_i x_i 
+//  C^m_i = C if m  = y_i,
+//  C^m_i = 0 if m != y_i,
+//  and w_m(\alpha) = \sum_i \alpha^m_i x_i
 //
-// Given: 
+// Given:
 // x, y, C
 // eps is the stopping tolerance
 //
@@ -394,7 +571,7 @@ void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
 //
 // See Appendix of LIBLINEAR paper, Fan et al. (2008)
 
-#define GETI(i) (prob->y[i])
+#define GETI(i) ((int) prob->y[i])
 // To support weights for instances, use GETI(i) (i)
 
 class Solver_MCSVM_CS
@@ -455,8 +632,8 @@ void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int act
 	double beta = D[0] - A_i*C_yi;
 	for(r=1;r<active_i && beta<r*D[r];r++)
 		beta += D[r];
-
 	beta /= r;
+
 	for(r=0;r<active_i;r++)
 	{
 		if(r == yi)
@@ -481,7 +658,7 @@ void Solver_MCSVM_CS::Solve(double *w)
 {
 	int i, m, s;
 	int iter = 0;
-	double *alpha =  new double[l*nr_class];
+	double *alpha = new double[l*nr_class];
 	double *alpha_new = new double[nr_class];
 	int *index = new int[l];
 	double *QD = new double[l];
@@ -493,11 +670,17 @@ void Solver_MCSVM_CS::Solve(double *w)
 	int *active_size_i = new int[l];
 	double eps_shrink = max(10.0*eps, 1.0); // stopping tolerance for shrinking
 	bool start_from_all = true;
-	// initial
+
+	// Initial alpha can be set here. Note that
+	// sum_m alpha[i*nr_class+m] = 0, for all i=1,...,l-1
+	// alpha[i*nr_class+m] <= C[GETI(i)] if prob->y[i] == m
+	// alpha[i*nr_class+m] <= 0 if prob->y[i] != m
+	// If initial alpha isn't zero, uncomment the for loop below to initialize w
 	for(i=0;i<l*nr_class;i++)
 		alpha[i] = 0;
+
 	for(i=0;i<w_size*nr_class;i++)
-		w[i] = 0; 
+		w[i] = 0;
 	for(i=0;i<l;i++)
 	{
 		for(m=0;m<nr_class;m++)
@@ -506,15 +689,20 @@ void Solver_MCSVM_CS::Solve(double *w)
 		QD[i] = 0;
 		while(xi->index != -1)
 		{
-			QD[i] += (xi->value)*(xi->value);
+			double val = xi->value;
+			QD[i] += val*val;
+
+			// Uncomment the for loop if initial alpha isn't zero
+			// for(m=0; m<nr_class; m++)
+			//	w[(xi->index-1)*nr_class+m] += alpha[i*nr_class+m]*val;
 			xi++;
 		}
 		active_size_i[i] = nr_class;
-		y_index[i] = prob->y[i];
+		y_index[i] = (int)prob->y[i];
 		index[i] = i;
 	}
 
-	while(iter < max_iter) 
+	while(iter < max_iter)
 	{
 		double stopping = -INF;
 		for(i=0;i<active_size;i++)
@@ -555,7 +743,7 @@ void Solver_MCSVM_CS::Solve(double *w)
 						maxG = G[m];
 				}
 				if(y_index[i] < active_size_i[i])
-					if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
+					if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
 						minG = G[y_index[i]];
 
 				for(m=0;m<active_size_i[i];m++)
@@ -565,14 +753,14 @@ void Solver_MCSVM_CS::Solve(double *w)
 						active_size_i[i]--;
 						while(active_size_i[i]>m)
 						{
-							if(!be_shrunk(i, active_size_i[i], y_index[i], 
+							if(!be_shrunk(i, active_size_i[i], y_index[i],
 											alpha_i[alpha_index_i[active_size_i[i]]], minG))
 							{
 								swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
 								swap(G[m], G[active_size_i[i]]);
 								if(y_index[i] == active_size_i[i])
 									y_index[i] = m;
-								else if(y_index[i] == m) 
+								else if(y_index[i] == m)
 									y_index[i] = active_size_i[i];
 								break;
 							}
@@ -585,7 +773,7 @@ void Solver_MCSVM_CS::Solve(double *w)
 				{
 					active_size--;
 					swap(index[s], index[active_size]);
-					s--;	
+					s--;
 					continue;
 				}
 
@@ -663,7 +851,7 @@ void Solver_MCSVM_CS::Solve(double *w)
 			nSV++;
 	}
 	for(i=0;i<l;i++)
-		v -= alpha[i*nr_class+prob->y[i]];
+		v -= alpha[i*nr_class+(int)prob->y[i]];
 	info("Objective value = %lf\n",v);
 	info("nSV = %d\n",nSV);
 
@@ -678,46 +866,47 @@ void Solver_MCSVM_CS::Solve(double *w)
 	delete [] active_size_i;
 }
 
-// A coordinate descent algorithm for 
+// A coordinate descent algorithm for
 // L1-loss and L2-loss SVM dual problems
 //
 //  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
-//    s.t.      0 <= alpha_i <= upper_bound_i,
-// 
+//    s.t.      0 <= \alpha_i <= upper_bound_i,
+//
 //  where Qij = yi yj xi^T xj and
-//  D is a diagonal matrix 
+//  D is a diagonal matrix
 //
 // In L1-SVM case:
-// 		upper_bound_i = Cp if y_i = 1
-// 		upper_bound_i = Cn if y_i = -1
-// 		D_ii = 0
+//              upper_bound_i = Cp if y_i = 1
+//              upper_bound_i = Cn if y_i = -1
+//              D_ii = 0
 // In L2-SVM case:
-// 		upper_bound_i = INF
-// 		D_ii = 1/(2*Cp)	if y_i = 1
-// 		D_ii = 1/(2*Cn)	if y_i = -1
+//              upper_bound_i = INF
+//              D_ii = 1/(2*Cp) if y_i = 1
+//              D_ii = 1/(2*Cn) if y_i = -1
 //
-// Given: 
+// Given:
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
-// 
+//
+// this function returns the number of iterations
+//
 // See Algorithm 3 of Hsieh et al., ICML 2008
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-static void solve_l2r_l1l2_svc(
-	const problem *prob, double *w, double eps, 
-	double Cp, double Cn, int solver_type)
+static int solve_l2r_l1l2_svc(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
 {
 	int l = prob->l;
 	int w_size = prob->n;
+	double eps = param->eps;
+	int solver_type = param->solver_type;
 	int i, s, iter = 0;
 	double C, d, G;
 	double *QD = new double[l];
-	int max_iter = 1000;
 	int *index = new int[l];
 	double *alpha = new double[l];
 	schar *y = new schar[l];
@@ -740,27 +929,33 @@ static void solve_l2r_l1l2_svc(
 		upper_bound[2] = Cp;
 	}
 
-	for(i=0; i<w_size; i++)
-		w[i] = 0;
 	for(i=0; i<l; i++)
 	{
-		alpha[i] = 0;
 		if(prob->y[i] > 0)
 		{
-			y[i] = +1; 
+			y[i] = +1;
 		}
 		else
 		{
 			y[i] = -1;
 		}
+	}
+
+	// Initial alpha can be set here. Note that
+	// 0 <= alpha[i] <= upper_bound[GETI(i)]
+	for(i=0; i<l; i++)
+		alpha[i] = 0;
+
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
 		QD[i] = diag[GETI(i)];
 
-		feature_node *xi = prob->x[i];
-		while (xi->index != -1)
-		{
-			QD[i] += (xi->value)*(xi->value);
-			xi++;
-		}
+		feature_node * const xi = prob->x[i];
+		QD[i] += sparse_operator::nrm2_sq(xi);
+		sparse_operator::axpy(y[i]*alpha[i], xi, w);
+
 		index[i] = i;
 	}
 
@@ -778,16 +973,10 @@ static void solve_l2r_l1l2_svc(
 		for (s=0; s<active_size; s++)
 		{
 			i = index[s];
-			G = 0;
-			schar yi = y[i];
+			const schar yi = y[i];
+			feature_node * const xi = prob->x[i];
 
-			feature_node *xi = prob->x[i];
-			while(xi->index!= -1)
-			{
-				G += w[xi->index-1]*(xi->value);
-				xi++;
-			}
-			G = G*yi-1;
+			G = yi*sparse_operator::dot(w, xi)-1;
 
 			C = upper_bound[GETI(i)];
 			G += alpha[i]*diag[GETI(i)];
@@ -828,12 +1017,7 @@ static void solve_l2r_l1l2_svc(
 				double alpha_old = alpha[i];
 				alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C);
 				d = (alpha[i] - alpha_old)*yi;
-				xi = prob->x[i];
-				while (xi->index != -1)
-				{
-					w[xi->index-1] += d*xi->value;
-					xi++;
-				}
+				sparse_operator::axpy(d, xi, w);
 			}
 		}
 
@@ -841,7 +1025,8 @@ static void solve_l2r_l1l2_svc(
 		if(iter % 10 == 0)
 			info(".");
 
-		if(PGmax_new - PGmin_new <= eps)
+		if(PGmax_new - PGmin_new <= eps &&
+			fabs(PGmax_new) <= eps && fabs(PGmin_new) <= eps)
 		{
 			if(active_size == l)
 				break;
@@ -863,8 +1048,6 @@ static void solve_l2r_l1l2_svc(
 	}
 
 	info("\noptimization finished, #iter = %d\n",iter);
-	if (iter >= max_iter)
-		info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
 
 	// calculate objective value
 
@@ -885,68 +1068,292 @@ static void solve_l2r_l1l2_svc(
 	delete [] alpha;
 	delete [] y;
 	delete [] index;
+
+	return iter;
 }
 
-// A coordinate descent algorithm for 
+
+// A coordinate descent algorithm for
+// L1-loss and L2-loss epsilon-SVR dual problem
+//
+//  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
+//    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
+//
+//  where Qij = xi^T xj and
+//  D is a diagonal matrix
+//
+// In L1-SVM case:
+//              upper_bound_i = C
+//              lambda_i = 0
+// In L2-SVM case:
+//              upper_bound_i = INF
+//              lambda_i = 1/(2*C)
+//
+// Given:
+// x, y, p, C
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// this function returns the number of iterations
+//
+// See Algorithm 4 of Ho and Lin, 2012
+
+#undef GETI
+#define GETI(i) (0)
+// To support weights for instances, use GETI(i) (i)
+
+static int solve_l2r_l1l2_svr(const problem *prob, const parameter *param, double *w, int max_iter=300)
+{
+	const int solver_type = param->solver_type;
+	int l = prob->l;
+	double C = param->C;
+	double p = param->p;
+	int w_size = prob->n;
+	double eps = param->eps;
+	int i, s, iter = 0;
+	int active_size = l;
+	int *index = new int[l];
+
+	double d, G, H;
+	double Gmax_old = INF;
+	double Gmax_new, Gnorm1_new;
+	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
+	double *beta = new double[l];
+	double *QD = new double[l];
+	double *y = prob->y;
+
+	// L2R_L2LOSS_SVR_DUAL
+	double lambda[1], upper_bound[1];
+	lambda[0] = 0.5/C;
+	upper_bound[0] = INF;
+
+	if(solver_type == L2R_L1LOSS_SVR_DUAL)
+	{
+		lambda[0] = 0;
+		upper_bound[0] = C;
+	}
+
+	// Initial beta can be set here. Note that
+	// -upper_bound <= beta[i] <= upper_bound
+	for(i=0; i<l; i++)
+		beta[i] = 0;
+
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		feature_node * const xi = prob->x[i];
+		QD[i] = sparse_operator::nrm2_sq(xi);
+		sparse_operator::axpy(beta[i], xi, w);
+
+		index[i] = i;
+	}
+
+
+	while(iter < max_iter)
+	{
+		Gmax_new = 0;
+		Gnorm1_new = 0;
+
+		for(i=0; i<active_size; i++)
+		{
+			int j = i+rand()%(active_size-i);
+			swap(index[i], index[j]);
+		}
+
+		for(s=0; s<active_size; s++)
+		{
+			i = index[s];
+			G = -y[i] + lambda[GETI(i)]*beta[i];
+			H = QD[i] + lambda[GETI(i)];
+
+			feature_node * const xi = prob->x[i];
+			G += sparse_operator::dot(w, xi);
+
+			double Gp = G+p;
+			double Gn = G-p;
+			double violation = 0;
+			if(beta[i] == 0)
+			{
+				if(Gp < 0)
+					violation = -Gp;
+				else if(Gn > 0)
+					violation = Gn;
+				else if(Gp>Gmax_old && Gn<-Gmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(beta[i] >= upper_bound[GETI(i)])
+			{
+				if(Gp > 0)
+					violation = Gp;
+				else if(Gp < -Gmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(beta[i] <= -upper_bound[GETI(i)])
+			{
+				if(Gn < 0)
+					violation = -Gn;
+				else if(Gn > Gmax_old)
+				{
+					active_size--;
+					swap(index[s], index[active_size]);
+					s--;
+					continue;
+				}
+			}
+			else if(beta[i] > 0)
+				violation = fabs(Gp);
+			else
+				violation = fabs(Gn);
+
+			Gmax_new = max(Gmax_new, violation);
+			Gnorm1_new += violation;
+
+			// obtain Newton direction d
+			if(Gp < H*beta[i])
+				d = -Gp/H;
+			else if(Gn > H*beta[i])
+				d = -Gn/H;
+			else
+				d = -beta[i];
+
+			if(fabs(d) < 1.0e-12)
+				continue;
+
+			double beta_old = beta[i];
+			beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]);
+			d = beta[i]-beta_old;
+
+			if(d != 0)
+				sparse_operator::axpy(d, xi, w);
+		}
+
+		if(iter == 0)
+			Gnorm1_init = Gnorm1_new;
+		iter++;
+		if(iter % 10 == 0)
+			info(".");
+
+		if(Gnorm1_new <= eps*Gnorm1_init)
+		{
+			if(active_size == l)
+				break;
+			else
+			{
+				active_size = l;
+				info("*");
+				Gmax_old = INF;
+				continue;
+			}
+		}
+
+		Gmax_old = Gmax_new;
+	}
+
+	info("\noptimization finished, #iter = %d\n", iter);
+
+	// calculate objective value
+	double v = 0;
+	int nSV = 0;
+	for(i=0; i<w_size; i++)
+		v += w[i]*w[i];
+	v = 0.5*v;
+	for(i=0; i<l; i++)
+	{
+		v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
+		if(beta[i] != 0)
+			nSV++;
+	}
+
+	info("Objective value = %lf\n", v);
+	info("nSV = %d\n",nSV);
+
+	delete [] beta;
+	delete [] QD;
+	delete [] index;
+
+	return iter;
+}
+
+
+// A coordinate descent algorithm for
 // the dual of L2-regularized logistic regression problems
 //
-//  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
-//    s.t.      0 <= alpha_i <= upper_bound_i,
-// 
-//  where Qij = yi yj xi^T xj and 
+//  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
+//    s.t.      0 <= \alpha_i <= upper_bound_i,
+//
+//  where Qij = yi yj xi^T xj and
 //  upper_bound_i = Cp if y_i = 1
 //  upper_bound_i = Cn if y_i = -1
 //
-// Given: 
+// Given:
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Algorithm 5 of Yu et al., MLJ 2010
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
+static int solve_l2r_lr_dual(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
 {
 	int l = prob->l;
 	int w_size = prob->n;
+	double eps = param->eps;
 	int i, s, iter = 0;
 	double *xTx = new double[l];
-	int max_iter = 1000;
-	int *index = new int[l];		
+	int *index = new int[l];
 	double *alpha = new double[2*l]; // store alpha and C - alpha
-	schar *y = new schar[l];	
+	schar *y = new schar[l];
 	int max_inner_iter = 100; // for inner Newton
-	double innereps = 1e-2; 
+	double innereps = 1e-2;
 	double innereps_min = min(1e-8, eps);
 	double upper_bound[3] = {Cn, 0, Cp};
 
-	for(i=0; i<w_size; i++)
-		w[i] = 0;
 	for(i=0; i<l; i++)
 	{
 		if(prob->y[i] > 0)
 		{
-			y[i] = +1; 
+			y[i] = +1;
 		}
 		else
 		{
 			y[i] = -1;
 		}
+	}
+
+	// Initial alpha can be set here. Note that
+	// 0 < alpha[i] < upper_bound[GETI(i)]
+	// alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
+	for(i=0; i<l; i++)
+	{
 		alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
 		alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
+	}
 
-		xTx[i] = 0;
-		feature_node *xi = prob->x[i];
-		while (xi->index != -1)
-		{
-			xTx[i] += (xi->value)*(xi->value);
-			w[xi->index-1] += y[i]*alpha[2*i]*xi->value;
-			xi++;
-		}
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		feature_node * const xi = prob->x[i];
+		xTx[i] = sparse_operator::nrm2_sq(xi);
+		sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
 		index[i] = i;
 	}
 
@@ -962,21 +1369,16 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
 		for (s=0; s<l; s++)
 		{
 			i = index[s];
-			schar yi = y[i];
+			const schar yi = y[i];
 			double C = upper_bound[GETI(i)];
 			double ywTx = 0, xisq = xTx[i];
-			feature_node *xi = prob->x[i];
-			while (xi->index != -1)
-			{
-				ywTx += w[xi->index-1]*xi->value;
-				xi++;
-			}
-			ywTx *= y[i];
+			feature_node * const xi = prob->x[i];
+			ywTx = yi*sparse_operator::dot(w, xi);
 			double a = xisq, b = ywTx;
 
 			// Decide to minimize g_1(z) or g_2(z)
 			int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
-			if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) 
+			if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
 			{
 				ind1 = 2*i+1;
 				ind2 = 2*i;
@@ -986,7 +1388,7 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
 			//  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
 			double alpha_old = alpha[ind1];
 			double z = alpha_old;
-			if(C - z < 0.5 * C) 
+			if(C - z < 0.5 * C)
 				z = 0.1*z;
 			double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
 			Gmax = max(Gmax, fabs(gp));
@@ -994,13 +1396,13 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
 			// Newton method on the sub-problem
 			const double eta = 0.1; // xi in the paper
 			int inner_iter = 0;
-			while (inner_iter <= max_inner_iter) 
+			while (inner_iter <= max_inner_iter)
 			{
 				if(fabs(gp) < innereps)
 					break;
 				double gpp = a + C/(C-z)/z;
 				double tmpz = z - gp/gpp;
-				if(tmpz <= 0) 
+				if(tmpz <= 0)
 					z *= eta;
 				else // tmpz in (0, C)
 					z = tmpz;
@@ -1013,12 +1415,7 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
 			{
 				alpha[ind1] = z;
 				alpha[ind2] = C-z;
-				xi = prob->x[i];
-				while (xi->index != -1)
-				{
-					w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
-					xi++;
-				}  
+				sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
 			}
 		}
 
@@ -1026,26 +1423,24 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
 		if(iter % 10 == 0)
 			info(".");
 
-		if(Gmax < eps) 
+		if(Gmax < eps)
 			break;
 
-		if(newton_iter <= l/10) 
+		if(newton_iter <= l/10)
 			innereps = max(innereps_min, 0.1*innereps);
 
 	}
 
 	info("\noptimization finished, #iter = %d\n",iter);
-	if (iter >= max_iter)
-		info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
 
 	// calculate objective value
-	
+
 	double v = 0;
 	for(i=0; i<w_size; i++)
 		v += w[i] * w[i];
 	v *= 0.5;
 	for(i=0; i<l; i++)
-		v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) 
+		v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
 			- upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
 	info("Objective value = %lf\n", v);
 
@@ -1053,31 +1448,37 @@ void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, do
 	delete [] alpha;
 	delete [] y;
 	delete [] index;
+
+	return iter;
 }
 
-// A coordinate descent algorithm for 
+// A coordinate descent algorithm for
 // L1-regularized L2-loss support vector classification
 //
 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
 //
-// Given: 
+// Given:
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
+//
+// To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
+// must have been added to the original data. (see -B and -R option)
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-static void solve_l1r_l2_svc(
-	problem *prob_col, double *w, double eps, 
-	double Cp, double Cn)
+static int solve_l1r_l2_svc(const problem *prob_col, const parameter* param, double *w, double Cp, double Cn, double eps)
 {
 	int l = prob_col->l;
 	int w_size = prob_col->n;
+	int regularize_bias = param->regularize_bias;
 	int j, s, iter = 0;
 	int max_iter = 1000;
 	int active_size = w_size;
@@ -1087,9 +1488,9 @@ static void solve_l1r_l2_svc(
 	double d, G_loss, G, H;
 	double Gmax_old = INF;
 	double Gmax_new, Gnorm1_new;
-	double Gnorm1_init;
+	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
 	double d_old, d_diff;
-	double loss_old, loss_new;
+	double loss_old = 0, loss_new;
 	double appxcond, cond;
 
 	int *index = new int[w_size];
@@ -1100,6 +1501,10 @@ static void solve_l1r_l2_svc(
 
 	double C[3] = {Cn,0,Cp};
 
+	// Initial w can be set here.
+	for(j=0; j<w_size; j++)
+		w[j] = 0;
+
 	for(j=0; j<l; j++)
 	{
 		b[j] = 1;
@@ -1110,15 +1515,15 @@ static void solve_l1r_l2_svc(
 	}
 	for(j=0; j<w_size; j++)
 	{
-		w[j] = 0;
 		index[j] = j;
 		xj_sq[j] = 0;
 		x = prob_col->x[j];
 		while(x->index != -1)
 		{
 			int ind = x->index-1;
-			double val = x->value;
 			x->value *= y[ind]; // x->value stores yi*xij
+			double val = x->value;
+			b[ind] -= w[j]*val;
 			xj_sq[j] += C[GETI(ind)]*val*val;
 			x++;
 		}
@@ -1160,59 +1565,72 @@ static void solve_l1r_l2_svc(
 			H *= 2;
 			H = max(H, 1e-12);
 
-			double Gp = G+1;
-			double Gn = G-1;
 			double violation = 0;
-			if(w[j] == 0)
-			{
-				if(Gp < 0)
-					violation = -Gp;
-				else if(Gn > 0)
-					violation = Gn;
-				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
-				{
-					active_size--;
-					swap(index[s], index[active_size]);
-					s--;
-					continue;
-				}
-			}
-			else if(w[j] > 0)
-				violation = fabs(Gp);
+			double Gp = 0, Gn = 0;
+			if(j == w_size-1 && regularize_bias == 0)
+				violation = fabs(G);
 			else
-				violation = fabs(Gn);
-
+			{
+				Gp = G+1;
+				Gn = G-1;
+				if(w[j] == 0)
+				{
+					if(Gp < 0)
+						violation = -Gp;
+					else if(Gn > 0)
+						violation = Gn;
+					else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
+					{
+						active_size--;
+						swap(index[s], index[active_size]);
+						s--;
+						continue;
+					}
+				}
+				else if(w[j] > 0)
+					violation = fabs(Gp);
+				else
+					violation = fabs(Gn);
+			}
 			Gmax_new = max(Gmax_new, violation);
 			Gnorm1_new += violation;
 
 			// obtain Newton direction d
-			if(Gp <= H*w[j])
-				d = -Gp/H;
-			else if(Gn >= H*w[j])
-				d = -Gn/H;
+			if(j == w_size-1 && regularize_bias == 0)
+				d = -G/H;
 			else
-				d = -w[j];
+			{
+				if(Gp < H*w[j])
+					d = -Gp/H;
+				else if(Gn > H*w[j])
+					d = -Gn/H;
+				else
+					d = -w[j];
+			}
 
 			if(fabs(d) < 1.0e-12)
 				continue;
 
-			double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
+			double delta;
+			if(j == w_size-1 && regularize_bias == 0)
+				delta = G*d;
+			else
+				delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
 			d_old = 0;
 			int num_linesearch;
 			for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
 			{
 				d_diff = d_old - d;
-				cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
+				if(j == w_size-1 && regularize_bias == 0)
+					cond = -sigma*delta;
+				else
+					cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
 
 				appxcond = xj_sq[j]*d*d + G_loss*d + cond;
 				if(appxcond <= 0)
 				{
 					x = prob_col->x[j];
-					while(x->index != -1)
-					{
-						b[x->index-1] += d_diff*x->value;
-						x++;
-					}
+					sparse_operator::axpy(d_diff, x, b);
 					break;
 				}
 
@@ -1272,11 +1690,7 @@ static void solve_l1r_l2_svc(
 				{
 					if(w[i]==0) continue;
 					x = prob_col->x[i];
-					while(x->index != -1)
-					{
-						b[x->index-1] -= w[i]*x->value;
-						x++;
-					}
+					sparse_operator::axpy(-w[i], x, b);
 				}
 			}
 		}
@@ -1325,6 +1739,8 @@ static void solve_l1r_l2_svc(
 			nnz++;
 		}
 	}
+	if (regularize_bias == 0)
+		v -= fabs(w[w_size-1]);
 	for(j=0; j<l; j++)
 		if(b[j] > 0)
 			v += C[GETI(j)]*b[j]*b[j];
@@ -1336,31 +1752,37 @@ static void solve_l1r_l2_svc(
 	delete [] y;
 	delete [] b;
 	delete [] xj_sq;
+
+	return iter;
 }
 
-// A coordinate descent algorithm for 
+// A coordinate descent algorithm for
 // L1-regularized logistic regression problems
 //
 //  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
 //
-// Given: 
+// Given:
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
+//
+// To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
+// must have been added to the original data. (see -B and -R option)
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-static void solve_l1r_lr(
-	const problem *prob_col, double *w, double eps, 
-	double Cp, double Cn)
+static int solve_l1r_lr(const problem *prob_col, const parameter *param, double *w, double Cp, double Cn, double eps)
 {
 	int l = prob_col->l;
 	int w_size = prob_col->n;
+	int regularize_bias = param->regularize_bias;
 	int j, s, newton_iter=0, iter=0;
 	int max_newton_iter = 100;
 	int max_iter = 1000;
@@ -1371,9 +1793,9 @@ static void solve_l1r_lr(
 	double nu = 1e-12;
 	double inner_eps = 1;
 	double sigma = 0.01;
-	double w_norm=0, w_norm_new;
+	double w_norm, w_norm_new;
 	double z, G, H;
-	double Gnorm1_init;
+	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
 	double Gmax_old = INF;
 	double Gmax_new, Gnorm1_new;
 	double QP_Gmax_old = INF;
@@ -1395,6 +1817,10 @@ static void solve_l1r_lr(
 
 	double C[3] = {Cn,0,Cp};
 
+	// Initial w can be set here.
+	for(j=0; j<w_size; j++)
+		w[j] = 0;
+
 	for(j=0; j<l; j++)
 	{
 		if(prob_col->y[j] > 0)
@@ -1402,14 +1828,13 @@ static void solve_l1r_lr(
 		else
 			y[j] = -1;
 
-		// assume initial w is 0
-		exp_wTx[j] = 1;
-		tau[j] = C[GETI(j)]*0.5;
-		D[j] = C[GETI(j)]*0.25;
+		exp_wTx[j] = 0;
 	}
+
+	w_norm = 0;
 	for(j=0; j<w_size; j++)
 	{
-		w[j] = 0;
+		w_norm += fabs(w[j]);
 		wpd[j] = w[j];
 		index[j] = j;
 		xjneg_sum[j] = 0;
@@ -1417,11 +1842,23 @@ static void solve_l1r_lr(
 		while(x->index != -1)
 		{
 			int ind = x->index-1;
+			double val = x->value;
+			exp_wTx[ind] += w[j]*val;
 			if(y[ind] == -1)
-				xjneg_sum[j] += C[GETI(ind)]*x->value;
+				xjneg_sum[j] += C[GETI(ind)]*val;
 			x++;
 		}
 	}
+	if (regularize_bias == 0)
+		w_norm -= fabs(w[w_size-1]);
+
+	for(j=0; j<l; j++)
+	{
+		exp_wTx[j] = exp(exp_wTx[j]);
+		double tau_tmp = 1/(1+exp_wTx[j]);
+		tau[j] = C[GETI(j)]*tau_tmp;
+		D[j] = C[GETI(j)]*exp_wTx[j]*tau_tmp*tau_tmp;
+	}
 
 	while(newton_iter < max_newton_iter)
 	{
@@ -1446,29 +1883,33 @@ static void solve_l1r_lr(
 			}
 			Grad[j] = -tmp + xjneg_sum[j];
 
-			double Gp = Grad[j]+1;
-			double Gn = Grad[j]-1;
 			double violation = 0;
-			if(w[j] == 0)
-			{
-				if(Gp < 0)
-					violation = -Gp;
-				else if(Gn > 0)
-					violation = Gn;
-				//outer-level shrinking
-				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
-				{
-					active_size--;
-					swap(index[s], index[active_size]);
-					s--;
-					continue;
-				}
-			}
-			else if(w[j] > 0)
-				violation = fabs(Gp);
+			if (j == w_size-1 && regularize_bias == 0)
+				violation = fabs(Grad[j]);
 			else
-				violation = fabs(Gn);
-
+			{
+				double Gp = Grad[j]+1;
+				double Gn = Grad[j]-1;
+				if(w[j] == 0)
+				{
+					if(Gp < 0)
+						violation = -Gp;
+					else if(Gn > 0)
+						violation = Gn;
+					//outer-level shrinking
+					else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
+					{
+						active_size--;
+						swap(index[s], index[active_size]);
+						s--;
+						continue;
+					}
+				}
+				else if(w[j] > 0)
+					violation = fabs(Gp);
+				else
+					violation = fabs(Gn);
+			}
 			Gmax_new = max(Gmax_new, violation);
 			Gnorm1_new += violation;
 		}
@@ -1512,40 +1953,48 @@ static void solve_l1r_lr(
 					x++;
 				}
 
-				double Gp = G+1;
-				double Gn = G-1;
 				double violation = 0;
-				if(wpd[j] == 0)
+				if (j == w_size-1 && regularize_bias == 0)
 				{
-					if(Gp < 0)
-						violation = -Gp;
-					else if(Gn > 0)
-						violation = Gn;
-					//inner-level shrinking
-					else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
-					{
-						QP_active_size--;
-						swap(index[s], index[QP_active_size]);
-						s--;
-						continue;
-					}
+					// bias term not shrunken
+					violation = fabs(G);
+					z = -G/H;
 				}
-				else if(wpd[j] > 0)
-					violation = fabs(Gp);
 				else
-					violation = fabs(Gn);
+				{
+					double Gp = G+1;
+					double Gn = G-1;
+					if(wpd[j] == 0)
+					{
+						if(Gp < 0)
+							violation = -Gp;
+						else if(Gn > 0)
+							violation = Gn;
+						//inner-level shrinking
+						else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
+						{
+							QP_active_size--;
+							swap(index[s], index[QP_active_size]);
+							s--;
+							continue;
+						}
+					}
+					else if(wpd[j] > 0)
+						violation = fabs(Gp);
+					else
+						violation = fabs(Gn);
 
+					// obtain solution of one-variable problem
+					if(Gp < H*wpd[j])
+						z = -Gp/H;
+					else if(Gn > H*wpd[j])
+						z = -Gn/H;
+					else
+						z = -wpd[j];
+				}
 				QP_Gmax_new = max(QP_Gmax_new, violation);
 				QP_Gnorm1_new += violation;
 
-				// obtain solution of one-variable problem
-				if(Gp <= H*wpd[j])
-					z = -Gp/H;
-				else if(Gn >= H*wpd[j])
-					z = -Gn/H;
-				else
-					z = -wpd[j];
-
 				if(fabs(z) < 1.0e-12)
 					continue;
 				z = min(max(z,-10.0),10.0);
@@ -1553,12 +2002,7 @@ static void solve_l1r_lr(
 				wpd[j] += z;
 
 				x = prob_col->x[j];
-				while(x->index != -1)
-				{
-					int ind = x->index-1;
-					xTd[ind] += x->value*z;
-					x++;
-				}
+				sparse_operator::axpy(z, x, xTd);
 			}
 
 			iter++;
@@ -1591,6 +2035,8 @@ static void solve_l1r_lr(
 			if(wpd[j] != 0)
 				w_norm_new += fabs(wpd[j]);
 		}
+		if (regularize_bias == 0)
+			w_norm_new -= fabs(wpd[w_size-1]);
 		delta += (w_norm_new-w_norm);
 
 		negsum_xTd = 0;
@@ -1633,6 +2079,8 @@ static void solve_l1r_lr(
 					if(wpd[j] != 0)
 						w_norm_new += fabs(wpd[j]);
 				}
+				if (regularize_bias == 0)
+					w_norm_new -= fabs(wpd[w_size-1]);
 				delta *= 0.5;
 				negsum_xTd *= 0.5;
 				for(int i=0; i<l; i++)
@@ -1650,11 +2098,7 @@ static void solve_l1r_lr(
 			{
 				if(w[i]==0) continue;
 				x = prob_col->x[i];
-				while(x->index != -1)
-				{
-					exp_wTx[x->index-1] += w[i]*x->value;
-					x++;
-				}
+				sparse_operator::axpy(w[i], x, exp_wTx);
 			}
 
 			for(int i=0; i<l; i++)
@@ -1676,7 +2120,7 @@ static void solve_l1r_lr(
 		info("WARNING: reaching max number of iterations\n");
 
 	// calculate objective value
-	
+
 	double v = 0;
 	int nnz = 0;
 	for(j=0; j<w_size; j++)
@@ -1685,6 +2129,8 @@ static void solve_l1r_lr(
 			v += fabs(w[j]);
 			nnz++;
 		}
+	if (regularize_bias == 0)
+		v -= fabs(w[w_size-1]);
 	for(j=0; j<l; j++)
 		if(y[j] == 1)
 			v += C[GETI(j)]*log(1+1/exp_wTx[j]);
@@ -1705,6 +2151,317 @@ static void solve_l1r_lr(
 	delete [] exp_wTx_new;
 	delete [] tau;
 	delete [] D;
+
+	return newton_iter;
+}
+
+static int compare_feature_node(const void *a, const void *b)
+{
+	double a_value = (*(feature_node *)a).value;
+	double b_value = (*(feature_node *)b).value;
+	int a_index = (*(feature_node *)a).index;
+	int b_index = (*(feature_node *)b).index;
+
+	if(a_value < b_value)
+		return -1;
+	else if(a_value == b_value)
+	{
+		if(a_index < b_index)
+			return -1;
+		else if(a_index == b_index)
+			return 0;
+	}
+	return 1;
+}
+
+// elements before the returned index are < pivot, while those after are >= pivot
+static int partition(feature_node *nodes, int low, int high)
+{
+	int i;
+	int index;
+
+	swap(nodes[low + rand()%(high-low+1)], nodes[high]); // select and move pivot to the end
+
+	index = low;
+	for(i = low; i < high; i++)
+		if (compare_feature_node(&nodes[i], &nodes[high]) == -1)
+		{
+			swap(nodes[index], nodes[i]);
+			index++;
+		}
+
+	swap(nodes[high], nodes[index]);
+	return index;
+}
+
+// rearrange nodes so that nodes[:k] contains nodes with the k smallest values.
+static void quick_select_min_k(feature_node *nodes, int low, int high, int k)
+{
+	int pivot;
+	if(low == high)
+		return;
+	pivot = partition(nodes, low, high);
+	if(pivot == k)
+		return;
+	else if(k-1 < pivot)
+		return quick_select_min_k(nodes, low, pivot-1, k);
+	else
+		return quick_select_min_k(nodes, pivot+1, high, k);
+}
+
+// A two-level coordinate descent algorithm for
+// a scaled one-class SVM dual problem
+//
+//  min_\alpha  0.5(\alpha^T Q \alpha),
+//    s.t.      0 <= \alpha_i <= 1 and
+//              e^T \alpha = \nu l
+//
+//  where Qij = xi^T xj
+//
+// Given:
+// x, nu
+// eps is the stopping tolerance
+//
+// solution will be put in w and rho
+//
+// this function returns the number of iterations
+//
+// See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
+
+static int solve_oneclass_svm(const problem *prob, const parameter *param, double *w, double *rho)
+{
+	int l = prob->l;
+	int w_size = prob->n;
+	double eps = param->eps;
+	double nu = param->nu;
+	int i, j, s, iter = 0;
+	double Gi, Gj;
+	double Qij, quad_coef, delta, sum;
+	double old_alpha_i;
+	double *QD = new double[l];
+	double *G = new double[l];
+	int *index = new int[l];
+	double *alpha = new double[l];
+	int max_inner_iter;
+	int max_iter = 1000;
+	int active_size = l;
+
+	double negGmax;                 // max { -grad(f)_i | i in Iup }
+	double negGmin;                 // min { -grad(f)_i | i in Ilow }
+	// Iup = { i | alpha_i < 1 }, Ilow = { i | alpha_i > 0 }
+	feature_node *max_negG_of_Iup = new feature_node[l];
+	feature_node *min_negG_of_Ilow = new feature_node[l];
+	feature_node node;
+
+	int n = (int)(nu*l);            // # of alpha's at upper bound
+	for(i=0; i<n; i++)
+		alpha[i] = 1;
+	if (n<l)
+		alpha[i] = nu*l-n;
+	for(i=n+1; i<l; i++)
+		alpha[i] = 0;
+
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		feature_node * const xi = prob->x[i];
+		QD[i] = sparse_operator::nrm2_sq(xi);
+		sparse_operator::axpy(alpha[i], xi, w);
+
+		index[i] = i;
+	}
+
+	while (iter < max_iter)
+	{
+		negGmax = -INF;
+		negGmin = INF;
+
+		for (s=0; s<active_size; s++)
+		{
+			i = index[s];
+			feature_node * const xi = prob->x[i];
+			G[i] = sparse_operator::dot(w, xi);
+			if (alpha[i] < 1)
+				negGmax = max(negGmax, -G[i]);
+			if (alpha[i] > 0)
+				negGmin = min(negGmin, -G[i]);
+		}
+
+		if (negGmax - negGmin < eps)
+		{
+			if (active_size == l)
+				break;
+			else
+			{
+				active_size = l;
+				info("*");
+				continue;
+			}
+		}
+
+		for(s=0; s<active_size; s++)
+		{
+			i = index[s];
+			if ((alpha[i] == 1 && -G[i] > negGmax) ||
+			    (alpha[i] == 0 && -G[i] < negGmin))
+			{
+				active_size--;
+				swap(index[s], index[active_size]);
+				s--;
+			}
+		}
+
+		max_inner_iter = max(active_size/10, 1);
+		int len_Iup = 0;
+		int len_Ilow = 0;
+		for(s=0; s<active_size; s++)
+		{
+			i = index[s];
+			node.index = i;
+			node.value = -G[i];
+
+			if (alpha[i] < 1)
+			{
+				max_negG_of_Iup[len_Iup] = node;
+				len_Iup++;
+			}
+
+			if (alpha[i] > 0)
+			{
+				min_negG_of_Ilow[len_Ilow] = node;
+				len_Ilow++;
+			}
+		}
+		max_inner_iter = min(max_inner_iter, min(len_Iup, len_Ilow));
+
+		quick_select_min_k(max_negG_of_Iup, 0, len_Iup-1, len_Iup-max_inner_iter);
+		qsort(&(max_negG_of_Iup[len_Iup-max_inner_iter]), max_inner_iter, sizeof(struct feature_node), compare_feature_node);
+
+		quick_select_min_k(min_negG_of_Ilow, 0, len_Ilow-1, max_inner_iter);
+		qsort(min_negG_of_Ilow, max_inner_iter, sizeof(struct feature_node), compare_feature_node);
+
+		for (s=0; s<max_inner_iter; s++)
+		{
+			i = max_negG_of_Iup[len_Iup-s-1].index;
+			j = min_negG_of_Ilow[s].index;
+
+			if ((alpha[i] == 0 && alpha[j] == 0) ||
+			    (alpha[i] == 1 && alpha[j] == 1))
+				continue;
+
+			feature_node const * xi = prob->x[i];
+			feature_node const * xj = prob->x[j];
+
+			Gi = sparse_operator::dot(w, xi);
+			Gj = sparse_operator::dot(w, xj);
+
+			int violating_pair = 0;
+			if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
+				violating_pair = 1;
+			else
+				if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
+					violating_pair = 1;
+			if (violating_pair == 0)
+				continue;
+
+			Qij = sparse_operator::sparse_dot(xi, xj);
+			quad_coef = QD[i] + QD[j] - 2*Qij;
+			if(quad_coef <= 0)
+				quad_coef = 1e-12;
+			delta = (Gi - Gj) / quad_coef;
+			old_alpha_i = alpha[i];
+			sum = alpha[i] + alpha[j];
+			alpha[i] = alpha[i] - delta;
+			alpha[j] = alpha[j] + delta;
+			if (sum > 1)
+			{
+				if (alpha[i] > 1)
+				{
+					alpha[i] = 1;
+					alpha[j] = sum - 1;
+				}
+			}
+			else
+			{
+				if (alpha[j] < 0)
+				{
+					alpha[j] = 0;
+					alpha[i] = sum;
+				}
+			}
+			if (sum > 1)
+			{
+				if (alpha[j] > 1)
+				{
+					alpha[j] = 1;
+					alpha[i] = sum - 1;
+				}
+			}
+			else
+			{
+				if (alpha[i] < 0)
+				{
+					alpha[i] = 0;
+					alpha[j] = sum;
+				}
+			}
+			delta = alpha[i] - old_alpha_i;
+			sparse_operator::axpy(delta, xi, w);
+			sparse_operator::axpy(-delta, xj, w);
+		}
+		iter++;
+		if (iter % 10 == 0)
+			info(".");
+	}
+	info("\noptimization finished, #iter = %d\n",iter);
+	if (iter >= max_iter)
+		info("\nWARNING: reaching max number of iterations\n\n");
+
+	// calculate object value
+	double v = 0;
+	for(i=0; i<w_size; i++)
+		v += w[i]*w[i];
+	int nSV = 0;
+	for(i=0; i<l; i++)
+	{
+		if (alpha[i] > 0)
+			++nSV;
+	}
+	info("Objective value = %lf\n", v/2);
+	info("nSV = %d\n", nSV);
+
+	// calculate rho
+	double nr_free = 0;
+	double ub = INF, lb = -INF, sum_free = 0;
+	for(i=0; i<l; i++)
+	{
+		double G = sparse_operator::dot(w, prob->x[i]);
+		if (alpha[i] == 1)
+			lb = max(lb, G);
+		else if (alpha[i] == 0)
+			ub = min(ub, G);
+		else
+		{
+			++nr_free;
+			sum_free += G;
+		}
+	}
+
+	if (nr_free > 0)
+		*rho = sum_free/nr_free;
+	else
+		*rho = (ub + lb)/2;
+	info("rho = %lf\n", *rho);
+
+	delete [] QD;
+	delete [] G;
+	delete [] index;
+	delete [] alpha;
+	delete [] max_negG_of_Iup;
+	delete [] min_negG_of_Ilow;
+
+	return iter;
 }
 
 // transpose matrix X from row format to column format
@@ -1713,12 +2470,12 @@ static void transpose(const problem *prob, feature_node **x_space_ret, problem *
 	int i;
 	int l = prob->l;
 	int n = prob->n;
-	int nnz = 0;
-	int *col_ptr = new int[n+1];
+	size_t nnz = 0;
+	size_t *col_ptr = new size_t [n+1];
 	feature_node *x_space;
 	prob_col->l = l;
 	prob_col->n = n;
-	prob_col->y = new int[l];
+	prob_col->y = new double[l];
 	prob_col->x = new feature_node*[n];
 
 	for(i=0; i<l; i++)
@@ -1777,7 +2534,7 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
 
 	for(i=0;i<l;i++)
 	{
-		int this_label = prob->y[i];
+		int this_label = (int)prob->y[i];
 		int j;
 		for(j=0;j<nr_class;j++)
 		{
@@ -1802,6 +2559,24 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
 		}
 	}
 
+	//
+	// Labels are ordered by their first occurrence in the training set.
+	// However, for two-class sets with -1/+1 labels and -1 appears first,
+	// we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
+	//
+	if (nr_class == 2 && label[0] == -1 && label[1] == 1)
+	{
+		swap(label[0],label[1]);
+		swap(count[0],count[1]);
+		for(i=0;i<l;i++)
+		{
+			if(data_label[i] == 0)
+				data_label[i] = 1;
+			else
+				data_label[i] = 0;
+		}
+	}
+
 	int *start = Malloc(int,nr_class);
 	start[0] = 0;
 	for(i=1;i<nr_class;i++)
@@ -1824,47 +2599,85 @@ static void group_classes(const problem *prob, int *nr_class_ret, int **label_re
 
 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
 {
-	double eps=param->eps;
-	int pos = 0;
-	int neg = 0;
-	for(int i=0;i<prob->l;i++)
-		if(prob->y[i]==+1)
-			pos++;
-	neg = prob->l - pos;
+	int solver_type = param->solver_type;
+	int dual_solver_max_iter = 300;
+	int iter;
 
-	function *fun_obj=NULL;
-	switch(param->solver_type)
+	bool is_regression = (solver_type==L2R_L2LOSS_SVR ||
+				solver_type==L2R_L1LOSS_SVR_DUAL ||
+				solver_type==L2R_L2LOSS_SVR_DUAL);
+
+	// Some solvers use Cp,Cn but not C array; extensions possible but no plan for now
+	double *C = new double[prob->l];
+	double primal_solver_tol = param->eps;
+	if(is_regression)
+	{
+		for(int i=0;i<prob->l;i++)
+			C[i] = param->C;
+	}
+	else
+	{
+		int pos = 0;
+		for(int i=0;i<prob->l;i++)
+		{
+			if(prob->y[i] > 0)
+			{
+				pos++;
+				C[i] = Cp;
+			}
+			else
+				C[i] = Cn;
+		}
+		int neg = prob->l - pos;
+		primal_solver_tol = param->eps*max(min(pos,neg), 1)/prob->l;
+	}
+
+	switch(solver_type)
 	{
 		case L2R_LR:
 		{
-			fun_obj=new l2r_lr_fun(prob, Cp, Cn);
-			TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
-			tron_obj.set_print_string(liblinear_print_string);
-			tron_obj.tron(w);
-			delete fun_obj;
+			l2r_lr_fun fun_obj(prob, param, C);
+			NEWTON newton_obj(&fun_obj, primal_solver_tol);
+			newton_obj.set_print_string(liblinear_print_string);
+			newton_obj.newton(w);
 			break;
 		}
 		case L2R_L2LOSS_SVC:
 		{
-			fun_obj=new l2r_l2_svc_fun(prob, Cp, Cn);
-			TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l);
-			tron_obj.set_print_string(liblinear_print_string);
-			tron_obj.tron(w);
-			delete fun_obj;
+			l2r_l2_svc_fun fun_obj(prob, param, C);
+			NEWTON newton_obj(&fun_obj, primal_solver_tol);
+			newton_obj.set_print_string(liblinear_print_string);
+			newton_obj.newton(w);
 			break;
 		}
 		case L2R_L2LOSS_SVC_DUAL:
-			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
+		{
+			iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+			{
+				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 2\n\n");
+				// primal_solver_tol obtained from eps for dual may be too loose
+				primal_solver_tol *= 0.1;
+				l2r_l2_svc_fun fun_obj(prob, param, C);
+				NEWTON newton_obj(&fun_obj, primal_solver_tol);
+				newton_obj.set_print_string(liblinear_print_string);
+				newton_obj.newton(w);
+			}
 			break;
+		}
 		case L2R_L1LOSS_SVC_DUAL:
-			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
+		{
+			iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+				info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");			
 			break;
+		}
 		case L1R_L2LOSS_SVC:
 		{
 			problem prob_col;
 			feature_node *x_space = NULL;
 			transpose(prob, &x_space ,&prob_col);
-			solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
+			solve_l1r_l2_svc(&prob_col, param, w, Cp, Cn, primal_solver_tol);
 			delete [] prob_col.y;
 			delete [] prob_col.x;
 			delete [] x_space;
@@ -1875,21 +2688,243 @@ static void train_one(const problem *prob, const parameter *param, double *w, do
 			problem prob_col;
 			feature_node *x_space = NULL;
 			transpose(prob, &x_space ,&prob_col);
-			solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
+			solve_l1r_lr(&prob_col, param, w, Cp, Cn, primal_solver_tol);
 			delete [] prob_col.y;
 			delete [] prob_col.x;
 			delete [] x_space;
 			break;
 		}
 		case L2R_LR_DUAL:
-			solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
+		{
+			iter = solve_l2r_lr_dual(prob, param, w, Cp, Cn, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+			{
+				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 0\n\n");
+				// primal_solver_tol obtained from eps for dual may be too loose
+				primal_solver_tol *= 0.1;
+				l2r_lr_fun fun_obj(prob, param, C);
+				NEWTON newton_obj(&fun_obj, primal_solver_tol);
+				newton_obj.set_print_string(liblinear_print_string);
+				newton_obj.newton(w);
+			}
 			break;
+		}
+		case L2R_L2LOSS_SVR:
+		{
+			l2r_l2_svr_fun fun_obj(prob, param, C);
+			NEWTON newton_obj(&fun_obj, primal_solver_tol);
+			newton_obj.set_print_string(liblinear_print_string);
+			newton_obj.newton(w);
+			break;
+		}
+		case L2R_L1LOSS_SVR_DUAL:
+		{
+			iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+				info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster (also see FAQ)\n\n");			
+
+			break;
+		}
+		case L2R_L2LOSS_SVR_DUAL:
+		{
+			iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+			{
+				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 11\n\n");
+				// primal_solver_tol obtained from eps for dual may be too loose
+				primal_solver_tol *= 0.001;
+				l2r_l2_svr_fun fun_obj(prob, param, C);
+				NEWTON newton_obj(&fun_obj, primal_solver_tol);
+				newton_obj.set_print_string(liblinear_print_string);
+				newton_obj.newton(w);
+			}
+			break;
+		}
 		default:
-			fprintf(stderr, "Error: unknown solver_type\n");
+			fprintf(stderr, "ERROR: unknown solver_type\n");
 			break;
 	}
+
+	delete[] C;
 }
 
+// Calculate the initial C for parameter selection
+static double calc_start_C(const problem *prob, const parameter *param)
+{
+	int i;
+	double xTx, max_xTx;
+	max_xTx = 0;
+	for(i=0; i<prob->l; i++)
+	{
+		xTx = 0;
+		feature_node *xi=prob->x[i];
+		while(xi->index != -1)
+		{
+			double val = xi->value;
+			xTx += val*val;
+			xi++;
+		}
+		if(xTx > max_xTx)
+			max_xTx = xTx;
+	}
+
+	double min_C = 1.0;
+	if(param->solver_type == L2R_LR)
+		min_C = 1.0 / (prob->l * max_xTx);
+	else if(param->solver_type == L2R_L2LOSS_SVC)
+		min_C = 1.0 / (2 * prob->l * max_xTx);
+	else if(param->solver_type == L2R_L2LOSS_SVR)
+	{
+		double sum_y, loss, y_abs;
+		double delta2 = 0.1;
+		sum_y = 0, loss = 0;
+		for(i=0; i<prob->l; i++)
+		{
+			y_abs = fabs(prob->y[i]);
+			sum_y += y_abs;
+			loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
+		}
+		if(loss > 0)
+			min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
+		else
+			min_C = INF;
+	}
+
+	return pow( 2, floor(log(min_C) / log(2.0)) );
+}
+
+static double calc_max_p(const problem *prob)
+{
+	int i;
+	double max_p = 0.0;
+	for(i = 0; i < prob->l; i++)
+		max_p = max(max_p, fabs(prob->y[i]));
+
+	return max_p;
+}
+
+static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
+{
+	// variables for CV
+	int i;
+	double *target = Malloc(double, prob->l);
+
+	// variables for warm start
+	double ratio = 2;
+	double **prev_w = Malloc(double*, nr_fold);
+	for(i = 0; i < nr_fold; i++)
+		prev_w[i] = NULL;
+	int num_unchanged_w = 0;
+	void (*default_print_string) (const char *) = liblinear_print_string;
+
+	if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
+		*best_score = 0.0;
+	else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
+		*best_score = INF;
+	*best_C = start_C;
+
+	param_tmp->C = start_C;
+	while(param_tmp->C <= max_C)
+	{
+		//Output disabled for running CV at a particular C
+		set_print_string_function(&print_null);
+
+		for(i=0; i<nr_fold; i++)
+		{
+			int j;
+			int begin = fold_start[i];
+			int end = fold_start[i+1];
+
+			param_tmp->init_sol = prev_w[i];
+			struct model *submodel = train(&subprob[i],param_tmp);
+
+			int total_w_size;
+			if(submodel->nr_class == 2)
+				total_w_size = subprob[i].n;
+			else
+				total_w_size = subprob[i].n * submodel->nr_class;
+
+			if(prev_w[i] == NULL)
+			{
+				prev_w[i] = Malloc(double, total_w_size);
+				for(j=0; j<total_w_size; j++)
+					prev_w[i][j] = submodel->w[j];
+			}
+			else if(num_unchanged_w >= 0)
+			{
+				double norm_w_diff = 0;
+				for(j=0; j<total_w_size; j++)
+				{
+					norm_w_diff += (submodel->w[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]);
+					prev_w[i][j] = submodel->w[j];
+				}
+				norm_w_diff = sqrt(norm_w_diff);
+
+				if(norm_w_diff > 1e-15)
+					num_unchanged_w = -1;
+			}
+			else
+			{
+				for(j=0; j<total_w_size; j++)
+					prev_w[i][j] = submodel->w[j];
+			}
+
+			for(j=begin; j<end; j++)
+				target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+
+			free_and_destroy_model(&submodel);
+		}
+		set_print_string_function(default_print_string);
+
+		if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
+		{
+			int total_correct = 0;
+			for(i=0; i<prob->l; i++)
+				if(target[i] == prob->y[i])
+					++total_correct;
+			double current_rate = (double)total_correct/prob->l;
+			if(current_rate > *best_score)
+			{
+				*best_C = param_tmp->C;
+				*best_score = current_rate;
+			}
+
+			info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
+		}
+		else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
+		{
+			double total_error = 0.0;
+			for(i=0; i<prob->l; i++)
+			{
+				double y = prob->y[i];
+				double v = target[i];
+				total_error += (v-y)*(v-y);
+			}
+			double current_error = total_error/prob->l;
+			if(current_error < *best_score)
+			{
+				*best_C = param_tmp->C;
+				*best_score = current_error;
+			}
+
+			info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
+		}
+
+		num_unchanged_w++;
+		if(num_unchanged_w == 5)
+			break;
+		param_tmp->C = param_tmp->C*ratio;
+	}
+
+	if(param_tmp->C > max_C)
+		info("WARNING: maximum C reached.\n");
+	free(target);
+	for(i=0; i<nr_fold; i++)
+		free(prev_w[i]);
+	free(prev_w);
+}
+
+
 //
 // Interface functions
 //
@@ -1908,120 +2943,164 @@ model* train(const problem *prob, const parameter *param)
 	model_->param = *param;
 	model_->bias = prob->bias;
 
-	int nr_class;
-	int *label = NULL;
-	int *start = NULL;
-	int *count = NULL;
-	int *perm = Malloc(int,l);
-
-	// group training data of the same class
-	group_classes(prob,&nr_class,&label,&start,&count,perm);
-
-	model_->nr_class=nr_class;
-	model_->label = Malloc(int,nr_class);
-	for(i=0;i<nr_class;i++)
-		model_->label[i] = label[i];
-
-	// calculate weighted C
-	double *weighted_C = Malloc(double, nr_class);
-	for(i=0;i<nr_class;i++)
-		weighted_C[i] = param->C;
-	for(i=0;i<param->nr_weight;i++)
+	if(check_regression_model(model_))
 	{
-		for(j=0;j<nr_class;j++)
-			if(param->weight_label[i] == label[j])
-				break;
-		if(j == nr_class)
-			fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
+		model_->w = Malloc(double, w_size);
+
+		if(param->init_sol != NULL)
+			for(i=0;i<w_size;i++)
+				model_->w[i] = param->init_sol[i];
 		else
-			weighted_C[j] *= param->weight[i];
+			for(i=0;i<w_size;i++)
+				model_->w[i] = 0;
+
+		model_->nr_class = 2;
+		model_->label = NULL;
+		train_one(prob, param, model_->w, 0, 0);
 	}
-
-	// constructing the subproblem
-	feature_node **x = Malloc(feature_node *,l);
-	for(i=0;i<l;i++)
-		x[i] = prob->x[perm[i]];
-
-	int k;
-	problem sub_prob;
-	sub_prob.l = l;
-	sub_prob.n = n;
-	sub_prob.x = Malloc(feature_node *,sub_prob.l);
-	sub_prob.y = Malloc(int,sub_prob.l);
-
-	for(k=0; k<sub_prob.l; k++)
-		sub_prob.x[k] = x[k];
-
-	// multi-class svm by Crammer and Singer
-	if(param->solver_type == MCSVM_CS)
+	else if(check_oneclass_model(model_))
 	{
-		model_->w=Malloc(double, n*nr_class);
-		for(i=0;i<nr_class;i++)
-			for(j=start[i];j<start[i]+count[i];j++)
-				sub_prob.y[j] = i;
-		Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
-		Solver.Solve(model_->w);
+		model_->w = Malloc(double, w_size);
+		model_->nr_class = 2;
+		model_->label = NULL;
+		solve_oneclass_svm(prob, param, model_->w, &(model_->rho));
 	}
 	else
 	{
-		if(nr_class == 2)
+		int nr_class;
+		int *label = NULL;
+		int *start = NULL;
+		int *count = NULL;
+		int *perm = Malloc(int,l);
+
+		// group training data of the same class
+		group_classes(prob,&nr_class,&label,&start,&count,perm);
+
+		model_->nr_class=nr_class;
+		model_->label = Malloc(int,nr_class);
+		for(i=0;i<nr_class;i++)
+			model_->label[i] = label[i];
+
+		// calculate weighted C
+		double *weighted_C = Malloc(double, nr_class);
+		for(i=0;i<nr_class;i++)
+			weighted_C[i] = param->C;
+		for(i=0;i<param->nr_weight;i++)
 		{
-			model_->w=Malloc(double, w_size);
+			for(j=0;j<nr_class;j++)
+				if(param->weight_label[i] == label[j])
+					break;
+			if(j == nr_class)
+				fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
+			else
+				weighted_C[j] *= param->weight[i];
+		}
 
-			int e0 = start[0]+count[0];
-			k=0;
-			for(; k<e0; k++)
-				sub_prob.y[k] = +1;
-			for(; k<sub_prob.l; k++)
-				sub_prob.y[k] = -1;
+		// constructing the subproblem
+		feature_node **x = Malloc(feature_node *,l);
+		for(i=0;i<l;i++)
+			x[i] = prob->x[perm[i]];
 
-			train_one(&sub_prob, param, &model_->w[0], weighted_C[0], weighted_C[1]);
+		int k;
+		problem sub_prob;
+		sub_prob.l = l;
+		sub_prob.n = n;
+		sub_prob.x = Malloc(feature_node *,sub_prob.l);
+		sub_prob.y = Malloc(double,sub_prob.l);
+
+		for(k=0; k<sub_prob.l; k++)
+			sub_prob.x[k] = x[k];
+
+		// multi-class svm by Crammer and Singer
+		if(param->solver_type == MCSVM_CS)
+		{
+			model_->w=Malloc(double, n*nr_class);
+			for(i=0;i<nr_class;i++)
+				for(j=start[i];j<start[i]+count[i];j++)
+					sub_prob.y[j] = i;
+			Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
+			Solver.Solve(model_->w);
 		}
 		else
 		{
-			model_->w=Malloc(double, w_size*nr_class);
-			double *w=Malloc(double, w_size);
-			for(i=0;i<nr_class;i++)
+			if(nr_class == 2)
 			{
-				int si = start[i];
-				int ei = si+count[i];
+				model_->w=Malloc(double, w_size);
 
+				int e0 = start[0]+count[0];
 				k=0;
-				for(; k<si; k++)
-					sub_prob.y[k] = -1;
-				for(; k<ei; k++)
+				for(; k<e0; k++)
 					sub_prob.y[k] = +1;
 				for(; k<sub_prob.l; k++)
 					sub_prob.y[k] = -1;
 
-				train_one(&sub_prob, param, w, weighted_C[i], param->C);
+				if(param->init_sol != NULL)
+					for(i=0;i<w_size;i++)
+						model_->w[i] = param->init_sol[i];
+				else
+					for(i=0;i<w_size;i++)
+						model_->w[i] = 0;
 
-				for(int j=0;j<w_size;j++)
-					model_->w[j*nr_class+i] = w[j];
+				train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]);
 			}
-			free(w);
+			else
+			{
+				model_->w=Malloc(double, w_size*nr_class);
+				double *w=Malloc(double, w_size);
+				for(i=0;i<nr_class;i++)
+				{
+					int si = start[i];
+					int ei = si+count[i];
+
+					k=0;
+					for(; k<si; k++)
+						sub_prob.y[k] = -1;
+					for(; k<ei; k++)
+						sub_prob.y[k] = +1;
+					for(; k<sub_prob.l; k++)
+						sub_prob.y[k] = -1;
+
+					if(param->init_sol != NULL)
+						for(j=0;j<w_size;j++)
+							w[j] = param->init_sol[j*nr_class+i];
+					else
+						for(j=0;j<w_size;j++)
+							w[j] = 0;
+
+					train_one(&sub_prob, param, w, weighted_C[i], param->C);
+
+					for(j=0;j<w_size;j++)
+						model_->w[j*nr_class+i] = w[j];
+				}
+				free(w);
+			}
+
 		}
 
+		free(x);
+		free(label);
+		free(start);
+		free(count);
+		free(perm);
+		free(sub_prob.x);
+		free(sub_prob.y);
+		free(weighted_C);
 	}
-
-	free(x);
-	free(label);
-	free(start);
-	free(count);
-	free(perm);
-	free(sub_prob.x);
-	free(sub_prob.y);
-	free(weighted_C);
 	return model_;
 }
 
-void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
+void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target)
 {
 	int i;
-	int *fold_start = Malloc(int,nr_fold+1);
+	int *fold_start;
 	int l = prob->l;
 	int *perm = Malloc(int,l);
-
+	if (nr_fold > l)
+	{
+		nr_fold = l;
+		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+	}
+	fold_start = Malloc(int,nr_fold+1);
 	for(i=0;i<l;i++) perm[i]=i;
 	for(i=0;i<l;i++)
 	{
@@ -2042,7 +3121,7 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
 		subprob.n = prob->n;
 		subprob.l = l-(end-begin);
 		subprob.x = Malloc(struct feature_node*,subprob.l);
-		subprob.y = Malloc(int,subprob.l);
+		subprob.y = Malloc(double,subprob.l);
 
 		k=0;
 		for(j=0;j<begin;j++)
@@ -2068,7 +3147,117 @@ void cross_validation(const problem *prob, const parameter *param, int nr_fold,
 	free(perm);
 }
 
-int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
+
+void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
+{
+	// prepare CV folds
+
+	int i;
+	int *fold_start;
+	int l = prob->l;
+	int *perm = Malloc(int, l);
+	struct problem *subprob = Malloc(problem,nr_fold);
+
+	if (nr_fold > l)
+	{
+		nr_fold = l;
+		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+	}
+	fold_start = Malloc(int,nr_fold+1);
+	for(i=0;i<l;i++) perm[i]=i;
+	for(i=0;i<l;i++)
+	{
+		int j = i+rand()%(l-i);
+		swap(perm[i],perm[j]);
+	}
+	for(i=0;i<=nr_fold;i++)
+		fold_start[i]=i*l/nr_fold;
+
+	for(i=0;i<nr_fold;i++)
+	{
+		int begin = fold_start[i];
+		int end = fold_start[i+1];
+		int j,k;
+
+		subprob[i].bias = prob->bias;
+		subprob[i].n = prob->n;
+		subprob[i].l = l-(end-begin);
+		subprob[i].x = Malloc(struct feature_node*,subprob[i].l);
+		subprob[i].y = Malloc(double,subprob[i].l);
+
+		k=0;
+		for(j=0;j<begin;j++)
+		{
+			subprob[i].x[k] = prob->x[perm[j]];
+			subprob[i].y[k] = prob->y[perm[j]];
+			++k;
+		}
+		for(j=end;j<l;j++)
+		{
+			subprob[i].x[k] = prob->x[perm[j]];
+			subprob[i].y[k] = prob->y[perm[j]];
+			++k;
+		}
+	}
+
+	struct parameter param_tmp = *param;
+	*best_p = -1;
+	if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
+	{
+		if(start_C <= 0)
+			start_C = calc_start_C(prob, &param_tmp);
+		double max_C = 1024;
+		start_C = min(start_C, max_C);
+		double best_C_tmp, best_score_tmp;
+
+		find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
+
+		*best_C = best_C_tmp;
+		*best_score = best_score_tmp;
+	}
+	else if(param->solver_type == L2R_L2LOSS_SVR)
+	{
+		double max_p = calc_max_p(prob);
+		int num_p_steps = 20;
+		double max_C = 1048576;
+		*best_score = INF;
+
+		i = num_p_steps-1;
+		if(start_p > 0)
+			i = min((int)(start_p/(max_p/num_p_steps)), i);
+		for(; i >= 0; i--)
+		{
+			param_tmp.p = i*max_p/num_p_steps;
+			double start_C_tmp;
+			if(start_C <= 0)
+				start_C_tmp = calc_start_C(prob, &param_tmp);
+			else
+				start_C_tmp = start_C;
+			start_C_tmp = min(start_C_tmp, max_C);
+			double best_C_tmp, best_score_tmp;
+
+			find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);
+
+			if(best_score_tmp < *best_score)
+			{
+				*best_p = param_tmp.p;
+				*best_C = best_C_tmp;
+				*best_score = best_score_tmp;
+			}
+		}
+	}
+
+	free(fold_start);
+	free(perm);
+	for(i=0; i<nr_fold; i++)
+	{
+		free(subprob[i].x);
+		free(subprob[i].y);
+	}
+	free(subprob);
+}
+
+double predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
 {
 	int idx;
 	int n;
@@ -2095,9 +3284,18 @@ int predict_values(const struct model *model_, const struct feature_node *x, dou
 			for(i=0;i<nr_w;i++)
 				dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
 	}
+	if(check_oneclass_model(model_))
+		dec_values[0] -= model_->rho;
 
 	if(nr_class==2)
-		return (dec_values[0]>0)?model_->label[0]:model_->label[1];
+	{
+		if(check_regression_model(model_))
+			return dec_values[0];
+		else if(check_oneclass_model(model_))
+			return (dec_values[0]>0)?1:-1;
+		else
+			return (dec_values[0]>0)?model_->label[0]:model_->label[1];
+	}
 	else
 	{
 		int dec_max_idx = 0;
@@ -2110,15 +3308,15 @@ int predict_values(const struct model *model_, const struct feature_node *x, dou
 	}
 }
 
-int predict(const model *model_, const feature_node *x)
+double predict(const model *model_, const feature_node *x)
 {
 	double *dec_values = Malloc(double, model_->nr_class);
-	int label=predict_values(model_, x, dec_values);
+	double label=predict_values(model_, x, dec_values);
 	free(dec_values);
 	return label;
 }
 
-int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
+double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
 {
 	if(check_probability_model(model_))
 	{
@@ -2130,7 +3328,7 @@ int predict_probability(const struct model *model_, const struct feature_node *x
 		else
 			nr_w = nr_class;
 
-		int label=predict_values(model_, x, prob_estimates);
+		double label=predict_values(model_, x, prob_estimates);
 		for(i=0;i<nr_w;i++)
 			prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
 
@@ -2146,7 +3344,7 @@ int predict_probability(const struct model *model_, const struct feature_node *x
 				prob_estimates[i]=prob_estimates[i]/sum;
 		}
 
-		return label;		
+		return label;
 	}
 	else
 		return 0;
@@ -2155,7 +3353,11 @@ int predict_probability(const struct model *model_, const struct feature_node *x
 static const char *solver_type_table[]=
 {
 	"L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
-	"L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL
+	"L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL",
+	"", "", "",
+	"L2R_L2LOSS_SVR", "L2R_L2LOSS_SVR_DUAL", "L2R_L1LOSS_SVR_DUAL",
+	"", "", "", "", "", "", "",
+	"ONECLASS_SVM", NULL
 };
 
 int save_model(const char *model_file_name, const struct model *model_)
@@ -2173,6 +3375,13 @@ int save_model(const char *model_file_name, const struct model *model_)
 	FILE *fp = fopen(model_file_name,"w");
 	if(fp==NULL) return -1;
 
+	char *old_locale = setlocale(LC_ALL, NULL);
+	if (old_locale)
+	{
+		old_locale = strdup(old_locale);
+	}
+	setlocale(LC_ALL, "C");
+
 	int nr_w;
 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
 		nr_w=1;
@@ -2181,28 +3390,62 @@ int save_model(const char *model_file_name, const struct model *model_)
 
 	fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]);
 	fprintf(fp, "nr_class %d\n", model_->nr_class);
-	fprintf(fp, "label");
-	for(i=0; i<model_->nr_class; i++)
-		fprintf(fp, " %d", model_->label[i]);
-	fprintf(fp, "\n");
+
+	if(model_->label)
+	{
+		fprintf(fp, "label");
+		for(i=0; i<model_->nr_class; i++)
+			fprintf(fp, " %d", model_->label[i]);
+		fprintf(fp, "\n");
+	}
 
 	fprintf(fp, "nr_feature %d\n", nr_feature);
 
-	fprintf(fp, "bias %.16g\n", model_->bias);
+	fprintf(fp, "bias %.17g\n", model_->bias);
+
+	if(check_oneclass_model(model_))
+		fprintf(fp, "rho %.17g\n", model_->rho);
 
 	fprintf(fp, "w\n");
 	for(i=0; i<w_size; i++)
 	{
 		int j;
 		for(j=0; j<nr_w; j++)
-			fprintf(fp, "%.16g ", model_->w[i*nr_w+j]);
+			fprintf(fp, "%.17g ", model_->w[i*nr_w+j]);
 		fprintf(fp, "\n");
 	}
 
+	setlocale(LC_ALL, old_locale);
+	free(old_locale);
+
 	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
 	else return 0;
 }
 
+//
+// FSCANF helps to handle fscanf failures.
+// Its do-while block avoids the ambiguity when
+// if (...)
+//    FSCANF();
+// is used
+//
+#define FSCANF(_stream, _format, _var)do\
+{\
+	if (fscanf(_stream, _format, _var) != 1)\
+	{\
+		fprintf(stderr, "ERROR: fscanf failed to read the model\n");\
+		EXIT_LOAD_MODEL()\
+	}\
+}while(0)
+// EXIT_LOAD_MODEL should NOT end with a semicolon.
+#define EXIT_LOAD_MODEL()\
+{\
+	setlocale(LC_ALL, old_locale);\
+	free(model_->label);\
+	free(model_);\
+	free(old_locale);\
+	return NULL;\
+}
 struct model *load_model(const char *model_file_name)
 {
 	FILE *fp = fopen(model_file_name,"r");
@@ -2213,18 +3456,31 @@ struct model *load_model(const char *model_file_name)
 	int n;
 	int nr_class;
 	double bias;
+	double rho;
 	model *model_ = Malloc(model,1);
 	parameter& param = model_->param;
+	// parameters for training only won't be assigned, but arrays are assigned as NULL for safety
+	param.nr_weight = 0;
+	param.weight_label = NULL;
+	param.weight = NULL;
+	param.init_sol = NULL;
 
 	model_->label = NULL;
 
+	char *old_locale = setlocale(LC_ALL, NULL);
+	if (old_locale)
+	{
+		old_locale = strdup(old_locale);
+	}
+	setlocale(LC_ALL, "C");
+
 	char cmd[81];
 	while(1)
 	{
-		fscanf(fp,"%80s",cmd);
+		FSCANF(fp,"%80s",cmd);
 		if(strcmp(cmd,"solver_type")==0)
 		{
-			fscanf(fp,"%80s",cmd);
+			FSCANF(fp,"%80s",cmd);
 			int i;
 			for(i=0;solver_type_table[i];i++)
 			{
@@ -2237,26 +3493,29 @@ struct model *load_model(const char *model_file_name)
 			if(solver_type_table[i] == NULL)
 			{
 				fprintf(stderr,"unknown solver type.\n");
-				free(model_->label);
-				free(model_);
-				return NULL;
+				EXIT_LOAD_MODEL()
 			}
 		}
 		else if(strcmp(cmd,"nr_class")==0)
 		{
-			fscanf(fp,"%d",&nr_class);
+			FSCANF(fp,"%d",&nr_class);
 			model_->nr_class=nr_class;
 		}
 		else if(strcmp(cmd,"nr_feature")==0)
 		{
-			fscanf(fp,"%d",&nr_feature);
+			FSCANF(fp,"%d",&nr_feature);
 			model_->nr_feature=nr_feature;
 		}
 		else if(strcmp(cmd,"bias")==0)
 		{
-			fscanf(fp,"%lf",&bias);
+			FSCANF(fp,"%lf",&bias);
 			model_->bias=bias;
 		}
+		else if(strcmp(cmd,"rho")==0)
+		{
+			FSCANF(fp,"%lf",&rho);
+			model_->rho=rho;
+		}
 		else if(strcmp(cmd,"w")==0)
 		{
 			break;
@@ -2266,13 +3525,12 @@ struct model *load_model(const char *model_file_name)
 			int nr_class = model_->nr_class;
 			model_->label = Malloc(int,nr_class);
 			for(int i=0;i<nr_class;i++)
-				fscanf(fp,"%d",&model_->label[i]);
+				FSCANF(fp,"%d",&model_->label[i]);
 		}
 		else
 		{
 			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
-			free(model_);
-			return NULL;
+			EXIT_LOAD_MODEL()
 		}
 	}
 
@@ -2293,9 +3551,12 @@ struct model *load_model(const char *model_file_name)
 	{
 		int j;
 		for(j=0; j<nr_w; j++)
-			fscanf(fp, "%lf ", &model_->w[i*nr_w+j]);
-		fscanf(fp, "\n");
+			FSCANF(fp, "%lf ", &model_->w[i*nr_w+j]);
 	}
+
+	setlocale(LC_ALL, old_locale);
+	free(old_locale);
+
 	if (ferror(fp) != 0 || fclose(fp) != 0) return NULL;
 
 	return model_;
@@ -2318,12 +3579,76 @@ void get_labels(const model *model_, int* label)
 			label[i] = model_->label[i];
 }
 
+// use inline here for better performance (around 20% faster than the non-inline one)
+static inline double get_w_value(const struct model *model_, int idx, int label_idx)
+{
+	int nr_class = model_->nr_class;
+	int solver_type = model_->param.solver_type;
+	const double *w = model_->w;
+
+	if(idx < 0 || idx > model_->nr_feature)
+		return 0;
+	if(check_regression_model(model_) || check_oneclass_model(model_))
+		return w[idx];
+	else
+	{
+		if(label_idx < 0 || label_idx >= nr_class)
+			return 0;
+		if(nr_class == 2 && solver_type != MCSVM_CS)
+		{
+			if(label_idx == 0)
+				return w[idx];
+			else
+				return -w[idx];
+		}
+		else
+			return w[idx*nr_class+label_idx];
+	}
+}
+
+// feat_idx: starting from 1 to nr_feature
+// label_idx: starting from 0 to nr_class-1 for classification models;
+//            for regression and one-class SVM models, label_idx is
+//            ignored.
+double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx)
+{
+	if(feat_idx > model_->nr_feature)
+		return 0;
+	return get_w_value(model_, feat_idx-1, label_idx);
+}
+
+double get_decfun_bias(const struct model *model_, int label_idx)
+{
+	if(check_oneclass_model(model_))
+	{
+		fprintf(stderr, "ERROR: get_decfun_bias can not be called for a one-class SVM model\n");
+		return 0;
+	}
+	int bias_idx = model_->nr_feature;
+	double bias = model_->bias;
+	if(bias <= 0)
+		return 0;
+	else
+		return bias*get_w_value(model_, bias_idx, label_idx);
+}
+
+double get_decfun_rho(const struct model *model_)
+{
+	if(check_oneclass_model(model_))
+		return model_->rho;
+	else
+	{
+		fprintf(stderr, "ERROR: get_decfun_rho can be called only for a one-class SVM model\n");
+		return 0;
+	}
+}
+
 void free_model_content(struct model *model_ptr)
 {
-	if(model_ptr->w != NULL)
-		free(model_ptr->w);
-	if(model_ptr->label != NULL)
-		free(model_ptr->label);
+	free(model_ptr->w);
+	model_ptr->w = NULL;
+	free(model_ptr->label);
+	model_ptr->label = NULL;
 }
 
 void free_and_destroy_model(struct model **model_ptr_ptr)
@@ -2333,15 +3658,18 @@ void free_and_destroy_model(struct model **model_ptr_ptr)
 	{
 		free_model_content(model_ptr);
 		free(model_ptr);
+		*model_ptr_ptr = NULL;
 	}
 }
 
 void destroy_param(parameter* param)
 {
-	if(param->weight_label != NULL)
-		free(param->weight_label);
-	if(param->weight != NULL)
-		free(param->weight);
+	free(param->weight_label);
+	param->weight_label = NULL;
+	free(param->weight);
+	param->weight = NULL;
+	free(param->init_sol);
+	param->init_sol = NULL;
 }
 
 const char *check_parameter(const problem *prob, const parameter *param)
@@ -2352,6 +3680,24 @@ const char *check_parameter(const problem *prob, const parameter *param)
 	if(param->C <= 0)
 		return "C <= 0";
 
+	if(param->p < 0 && param->solver_type == L2R_L2LOSS_SVR)
+		return "p < 0";
+
+	if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
+		return "prob->bias >=0, but this is ignored in ONECLASS_SVM";
+
+	if(param->regularize_bias == 0)
+	{
+		if(prob->bias != 1.0)
+			return "To not regularize bias, must specify -B 1 along with -R";
+		if(param->solver_type != L2R_LR
+			&& param->solver_type != L2R_L2LOSS_SVC
+			&& param->solver_type != L1R_L2LOSS_SVC
+			&& param->solver_type != L1R_LR
+			&& param->solver_type != L2R_L2LOSS_SVR)
+			return "-R option supported only for solver L2R_LR, L2R_L2LOSS_SVC, L1R_L2LOSS_SVC, L1R_LR, and L2R_L2LOSS_SVR";
+	}
+
 	if(param->solver_type != L2R_LR
 		&& param->solver_type != L2R_L2LOSS_SVC_DUAL
 		&& param->solver_type != L2R_L2LOSS_SVC
@@ -2359,9 +3705,19 @@ const char *check_parameter(const problem *prob, const parameter *param)
 		&& param->solver_type != MCSVM_CS
 		&& param->solver_type != L1R_L2LOSS_SVC
 		&& param->solver_type != L1R_LR
-		&& param->solver_type != L2R_LR_DUAL)
+		&& param->solver_type != L2R_LR_DUAL
+		&& param->solver_type != L2R_L2LOSS_SVR
+		&& param->solver_type != L2R_L2LOSS_SVR_DUAL
+		&& param->solver_type != L2R_L1LOSS_SVR_DUAL
+		&& param->solver_type != ONECLASS_SVM)
 		return "unknown solver type";
 
+	if(param->init_sol != NULL
+		&& param->solver_type != L2R_LR
+		&& param->solver_type != L2R_L2LOSS_SVC
+		&& param->solver_type != L2R_L2LOSS_SVR)
+		return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";
+
 	return NULL;
 }
 
@@ -2372,9 +3728,21 @@ int check_probability_model(const struct model *model_)
 			model_->param.solver_type==L1R_LR);
 }
 
+int check_regression_model(const struct model *model_)
+{
+	return (model_->param.solver_type==L2R_L2LOSS_SVR ||
+			model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
+			model_->param.solver_type==L2R_L2LOSS_SVR_DUAL);
+}
+
+int check_oneclass_model(const struct model *model_)
+{
+	return model_->param.solver_type == ONECLASS_SVM;
+}
+
 void set_print_string_function(void (*print_func)(const char*))
 {
-	if (print_func == NULL) 
+	if (print_func == NULL)
 		liblinear_print_string = &print_string_stdout;
 	else
 		liblinear_print_string = print_func;
diff --git a/liblinear/linear.def b/liblinear/linear.def
index 2425996ce..c3b8d5174 100644
--- a/liblinear/linear.def
+++ b/liblinear/linear.def
@@ -16,3 +16,9 @@ EXPORTS
 	check_parameter	@14
 	check_probability_model	@15
 	set_print_string_function	@16
+    get_decfun_coef @17
+    get_decfun_bias @18
+    check_regression_model  @19
+    find_parameters @20
+    get_decfun_rho @21
+    check_oneclass_model @22
diff --git a/liblinear/linear.h b/liblinear/linear.h
index 2a1aa28d2..966ac4c55 100644
--- a/liblinear/linear.h
+++ b/liblinear/linear.h
@@ -1,10 +1,14 @@
 #ifndef _LIBLINEAR_H
 #define _LIBLINEAR_H
 
+#define LIBLINEAR_VERSION 247
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+extern int liblinear_version;
+
 struct feature_node
 {
 	int index;
@@ -14,41 +18,47 @@ struct feature_node
 struct problem
 {
 	int l, n;
-	int *y;
+	double *y;
 	struct feature_node **x;
-	double bias;            /* < 0 if no bias term */  
+	double bias;            /* < 0 if no bias term */
 };
 
-enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL }; /* solver_type */
+enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL, ONECLASS_SVM = 21 }; /* solver_type */
 
 struct parameter
 {
 	int solver_type;
 
 	/* these are for training only */
-	double eps;	        /* stopping criteria */
+	double eps;             /* stopping tolerance */
 	double C;
 	int nr_weight;
 	int *weight_label;
 	double* weight;
+	double p;
+	double nu;
+	double *init_sol;
+	int regularize_bias;
 };
 
 struct model
 {
 	struct parameter param;
-	int nr_class;		/* number of classes */
+	int nr_class;           /* number of classes */
 	int nr_feature;
 	double *w;
-	int *label;		/* label of each class */
+	int *label;             /* label of each class */
 	double bias;
+	double rho;             /* one-class SVM only */
 };
 
 struct model* train(const struct problem *prob, const struct parameter *param);
-void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, int *target);
+void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
+void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);
 
-int predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
-int predict(const struct model *model_, const struct feature_node *x);
-int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
+double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
+double predict(const struct model *model_, const struct feature_node *x);
+double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
 
 int save_model(const char *model_file_name, const struct model *model_);
 struct model *load_model(const char *model_file_name);
@@ -56,6 +66,9 @@ struct model *load_model(const char *model_file_name);
 int get_nr_feature(const struct model *model_);
 int get_nr_class(const struct model *model_);
 void get_labels(const struct model *model_, int* label);
+double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
+double get_decfun_bias(const struct model *model_, int label_idx);
+double get_decfun_rho(const struct model *model_);
 
 void free_model_content(struct model *model_ptr);
 void free_and_destroy_model(struct model **model_ptr_ptr);
@@ -63,6 +76,8 @@ void destroy_param(struct parameter *param);
 
 const char *check_parameter(const struct problem *prob, const struct parameter *param);
 int check_probability_model(const struct model *model);
+int check_regression_model(const struct model *model);
+int check_oneclass_model(const struct model *model);
 void set_print_string_function(void (*print_func) (const char*));
 
 #ifdef __cplusplus
diff --git a/liblinear/newton.cpp b/liblinear/newton.cpp
new file mode 100644
index 000000000..0fe3ccffc
--- /dev/null
+++ b/liblinear/newton.cpp
@@ -0,0 +1,251 @@
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include "newton.h"
+
+#ifndef min
+template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
+#endif
+
+#ifndef max
+template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern double dnrm2_(int *, double *, int *);
+extern double ddot_(int *, double *, int *, double *, int *);
+extern int daxpy_(int *, double *, double *, int *, double *, int *);
+extern int dscal_(int *, double *, double *, int *);
+
+#ifdef __cplusplus
+}
+#endif
+
+static void default_print(const char *buf)
+{
+	fputs(buf,stdout);
+	fflush(stdout);
+}
+
+// On entry *f must be the function value of w
+// On exit w is updated and *f is the new function value
+double function::linesearch_and_update(double *w, double *s, double *f, double *g, double alpha)
+{
+	double gTs = 0;
+	double eta = 0.01;
+	int n = get_nr_variable();
+	int max_num_linesearch = 20;
+	double *w_new = new double[n];
+	double fold = *f;
+
+	for (int i=0;i<n;i++)
+		gTs += s[i] * g[i];
+
+	int num_linesearch = 0;
+	for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
+	{
+		for (int i=0;i<n;i++)
+			w_new[i] = w[i] + alpha*s[i];
+		*f = fun(w_new);
+		if (*f - fold <= eta * alpha * gTs)
+			break;
+		else
+			alpha *= 0.5;
+	}
+
+	if (num_linesearch >= max_num_linesearch)
+	{
+		*f = fold;
+		return 0;
+	}
+	else
+		memcpy(w, w_new, sizeof(double)*n);
+
+	delete [] w_new;
+	return alpha;
+}
+
+void NEWTON::info(const char *fmt,...)
+{
+	char buf[BUFSIZ];
+	va_list ap;
+	va_start(ap,fmt);
+	vsprintf(buf,fmt,ap);
+	va_end(ap);
+	(*newton_print_string)(buf);
+}
+
+NEWTON::NEWTON(const function *fun_obj, double eps, double eps_cg, int max_iter)
+{
+	this->fun_obj=const_cast<function *>(fun_obj);
+	this->eps=eps;
+	this->eps_cg=eps_cg;
+	this->max_iter=max_iter;
+	newton_print_string = default_print;
+}
+
+NEWTON::~NEWTON()
+{
+}
+
+void NEWTON::newton(double *w)
+{
+	int n = fun_obj->get_nr_variable();
+	int i, cg_iter;
+	double step_size;
+	double f, fold, actred;
+	double init_step_size = 1;
+	int search = 1, iter = 1, inc = 1;
+	double *s = new double[n];
+	double *r = new double[n];
+	double *g = new double[n];
+
+	const double alpha_pcg = 0.01;
+	double *M = new double[n];
+
+	// calculate gradient norm at w=0 for stopping condition.
+	double *w0 = new double[n];
+	for (i=0; i<n; i++)
+		w0[i] = 0;
+	fun_obj->fun(w0);
+	fun_obj->grad(w0, g);
+	double gnorm0 = dnrm2_(&n, g, &inc);
+	delete [] w0;
+
+	f = fun_obj->fun(w);
+	fun_obj->grad(w, g);
+	double gnorm = dnrm2_(&n, g, &inc);
+	info("init f %5.3e |g| %5.3e\n", f, gnorm);
+
+	if (gnorm <= eps*gnorm0)
+		search = 0;
+
+	while (iter <= max_iter && search)
+	{
+		fun_obj->get_diag_preconditioner(M);
+		for(i=0; i<n; i++)
+			M[i] = (1-alpha_pcg) + alpha_pcg*M[i];
+		cg_iter = pcg(g, M, s, r);
+
+		fold = f;
+		step_size = fun_obj->linesearch_and_update(w, s, &f, g, init_step_size);
+
+		if (step_size == 0)
+		{
+			info("WARNING: line search fails\n");
+			break;
+		}
+
+		fun_obj->grad(w, g);
+		gnorm = dnrm2_(&n, g, &inc);
+
+		info("iter %2d f %5.3e |g| %5.3e CG %3d step_size %4.2e \n", iter, f, gnorm, cg_iter, step_size);
+		
+		if (gnorm <= eps*gnorm0)
+			break;
+		if (f < -1.0e+32)
+		{
+			info("WARNING: f < -1.0e+32\n");
+			break;
+		}
+		actred = fold - f;
+		if (fabs(actred) <= 1.0e-12*fabs(f))
+		{
+			info("WARNING: actred too small\n");
+			break;
+		}
+
+		iter++;
+	}
+
+	if(iter >= max_iter)
+		info("\nWARNING: reaching max number of Newton iterations\n");
+
+	delete[] g;
+	delete[] r;
+	delete[] s;
+	delete[] M;
+}
+
+int NEWTON::pcg(double *g, double *M, double *s, double *r)
+{
+	int i, inc = 1;
+	int n = fun_obj->get_nr_variable();
+	double one = 1;
+	double *d = new double[n];
+	double *Hd = new double[n];
+	double zTr, znewTrnew, alpha, beta, cgtol, dHd;
+	double *z = new double[n];
+	double Q = 0, newQ, Qdiff;
+
+	for (i=0; i<n; i++)
+	{
+		s[i] = 0;
+		r[i] = -g[i];
+		z[i] = r[i] / M[i];
+		d[i] = z[i];
+	}
+
+	zTr = ddot_(&n, z, &inc, r, &inc);
+	double gMinv_norm = sqrt(zTr);
+	cgtol = min(eps_cg, sqrt(gMinv_norm));
+	int cg_iter = 0;
+	int max_cg_iter = max(n, 5);
+
+	while (cg_iter < max_cg_iter)
+	{
+		cg_iter++;
+
+		fun_obj->Hv(d, Hd);
+		dHd = ddot_(&n, d, &inc, Hd, &inc);
+		// avoid 0/0 in getting alpha
+		if (dHd <= 1.0e-16)
+			break;
+		
+		alpha = zTr/dHd;
+		daxpy_(&n, &alpha, d, &inc, s, &inc);
+		alpha = -alpha;
+		daxpy_(&n, &alpha, Hd, &inc, r, &inc);
+
+		// Using quadratic approximation as CG stopping criterion
+		newQ = -0.5*(ddot_(&n, s, &inc, r, &inc) - ddot_(&n, s, &inc, g, &inc));
+		Qdiff = newQ - Q;
+		if (newQ <= 0 && Qdiff <= 0)
+		{
+			if (cg_iter * Qdiff >= cgtol * newQ)
+				break;
+		}
+		else
+		{
+			info("WARNING: quadratic approximation > 0 or increasing in CG\n");
+			break;
+		}
+		Q = newQ;
+
+		for (i=0; i<n; i++)
+			z[i] = r[i] / M[i];
+		znewTrnew = ddot_(&n, z, &inc, r, &inc);
+		beta = znewTrnew/zTr;
+		dscal_(&n, &beta, d, &inc);
+		daxpy_(&n, &one, z, &inc, d, &inc);
+		zTr = znewTrnew;
+	}
+
+	if (cg_iter == max_cg_iter)
+		info("WARNING: reaching maximal number of CG steps\n");
+
+	delete[] d;
+	delete[] Hd;
+	delete[] z;
+
+	return cg_iter;
+}
+
+void NEWTON::set_print_string(void (*print_string) (const char *buf))
+{
+	newton_print_string = print_string;
+}
diff --git a/liblinear/newton.h b/liblinear/newton.h
new file mode 100644
index 000000000..c28bee859
--- /dev/null
+++ b/liblinear/newton.h
@@ -0,0 +1,37 @@
+#ifndef _NEWTON_H
+#define _NEWTON_H
+
+class function
+{
+public:
+	virtual double fun(double *w) = 0 ;
+	virtual void grad(double *w, double *g) = 0 ;
+	virtual void Hv(double *s, double *Hs) = 0 ;
+	virtual int get_nr_variable(void) = 0 ;
+	virtual void get_diag_preconditioner(double *M) = 0 ;
+	virtual ~function(void){}
+
+	// base implementation in newton.cpp
+	virtual double linesearch_and_update(double *w, double *s, double *f, double *g, double alpha);
+};
+
+class NEWTON
+{
+public:
+	NEWTON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.5, int max_iter = 1000);
+	~NEWTON();
+
+	void newton(double *w);
+	void set_print_string(void (*i_print) (const char *buf));
+
+private:
+	int pcg(double *g, double *M, double *s, double *r);
+
+	double eps;
+	double eps_cg;
+	int max_iter;
+	function *fun_obj;
+	void info(const char *fmt,...);
+	void (*newton_print_string)(const char *buf);
+};
+#endif
diff --git a/liblinear/predict.c b/liblinear/predict.c
index 0b4be975b..85ed067c6 100644
--- a/liblinear/predict.c
+++ b/liblinear/predict.c
@@ -5,6 +5,10 @@
 #include <errno.h>
 #include "linear.h"
 
+int print_null(const char *s,...) {return 0;}
+
+static int (*info)(const char *fmt,...) = &printf;
+
 struct feature_node *x;
 int max_nr_attr = 64;
 
@@ -23,7 +27,7 @@ static int max_line_len;
 static char* readline(FILE *input)
 {
 	int len;
-	
+
 	if(fgets(line,max_line_len,input) == NULL)
 		return NULL;
 
@@ -38,10 +42,12 @@ static char* readline(FILE *input)
 	return line;
 }
 
-void do_predict(FILE *input, FILE *output, struct model* model_)
+void do_predict(FILE *input, FILE *output)
 {
 	int correct = 0;
 	int total = 0;
+	double error = 0;
+	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 
 	int nr_class=get_nr_class(model_);
 	double *prob_estimates=NULL;
@@ -65,7 +71,7 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
 		labels=(int *) malloc(nr_class*sizeof(int));
 		get_labels(model_,labels);
 		prob_estimates = (double *) malloc(nr_class*sizeof(double));
-		fprintf(output,"labels");		
+		fprintf(output,"labels");
 		for(j=0;j<nr_class;j++)
 			fprintf(output," %d",labels[j]);
 		fprintf(output,"\n");
@@ -77,7 +83,7 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
 	while(readline(input) != NULL)
 	{
 		int i = 0;
-		int target_label, predict_label;
+		double target_label, predict_label;
 		char *idx, *val, *label, *endptr;
 		int inst_max_index = 0; // strtol gives 0 if wrong format
 
@@ -85,13 +91,13 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
 		if(label == NULL) // empty line
 			exit_input_error(total+1);
 
-		target_label = (int) strtol(label,&endptr,10);
+		target_label = strtod(label,&endptr);
 		if(endptr == label || *endptr != '\0')
 			exit_input_error(total+1);
 
 		while(1)
 		{
-			if(i>=max_nr_attr-2)	// need one more for index = -1
+			if(i>=max_nr_attr-2)    // need one more for index = -1
 			{
 				max_nr_attr *= 2;
 				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
@@ -131,7 +137,7 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
 		{
 			int j;
 			predict_label = predict_probability(model_,x,prob_estimates);
-			fprintf(output,"%d",predict_label);
+			fprintf(output,"%g",predict_label);
 			for(j=0;j<model_->nr_class;j++)
 				fprintf(output," %g",prob_estimates[j]);
 			fprintf(output,"\n");
@@ -139,14 +145,29 @@ void do_predict(FILE *input, FILE *output, struct model* model_)
 		else
 		{
 			predict_label = predict(model_,x);
-			fprintf(output,"%d\n",predict_label);
+			fprintf(output,"%.17g\n",predict_label);
 		}
 
 		if(predict_label == target_label)
 			++correct;
+		error += (predict_label-target_label)*(predict_label-target_label);
+		sump += predict_label;
+		sumt += target_label;
+		sumpp += predict_label*predict_label;
+		sumtt += target_label*target_label;
+		sumpt += predict_label*target_label;
 		++total;
 	}
-	printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
+	if(check_regression_model(model_))
+	{
+		info("Mean squared error = %g (regression)\n",error/total);
+		info("Squared correlation coefficient = %g (regression)\n",
+			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+			);
+	}
+	else
+		info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
 	if(flag_predict_probability)
 		free(prob_estimates);
 }
@@ -156,7 +177,8 @@ void exit_with_help()
 	printf(
 	"Usage: predict [options] test_file model_file output_file\n"
 	"options:\n"
-	"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0)\n"
+	"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
+	"-q : quiet mode (no outputs)\n"
 	);
 	exit(1);
 }
@@ -176,7 +198,10 @@ int main(int argc, char **argv)
 			case 'b':
 				flag_predict_probability = atoi(argv[i]);
 				break;
-
+			case 'q':
+				info = &print_null;
+				i--;
+				break;
 			default:
 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
 				exit_with_help();
@@ -207,7 +232,7 @@ int main(int argc, char **argv)
 	}
 
 	x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
-	do_predict(input, output, model_);
+	do_predict(input, output);
 	free_and_destroy_model(&model_);
 	free(line);
 	free(x);
diff --git a/liblinear/train.c b/liblinear/train.c
index 4920313a0..7923a8d31 100644
--- a/liblinear/train.c
+++ b/liblinear/train.c
@@ -16,28 +16,45 @@ void exit_with_help()
 	"Usage: train [options] training_set_file [model_file]\n"
 	"options:\n"
 	"-s type : set type of solver (default 1)\n"
-	"	0 -- L2-regularized logistic regression (primal)\n"
-	"	1 -- L2-regularized L2-loss support vector classification (dual)\n"	
-	"	2 -- L2-regularized L2-loss support vector classification (primal)\n"
-	"	3 -- L2-regularized L1-loss support vector classification (dual)\n"
-	"	4 -- multi-class support vector classification by Crammer and Singer\n"
-	"	5 -- L1-regularized L2-loss support vector classification\n"
-	"	6 -- L1-regularized logistic regression\n"
-	"	7 -- L2-regularized logistic regression (dual)\n"
+	"  for multi-class classification\n"
+	"        0 -- L2-regularized logistic regression (primal)\n"
+	"        1 -- L2-regularized L2-loss support vector classification (dual)\n"
+	"        2 -- L2-regularized L2-loss support vector classification (primal)\n"
+	"        3 -- L2-regularized L1-loss support vector classification (dual)\n"
+	"        4 -- support vector classification by Crammer and Singer\n"
+	"        5 -- L1-regularized L2-loss support vector classification\n"
+	"        6 -- L1-regularized logistic regression\n"
+	"        7 -- L2-regularized logistic regression (dual)\n"
+	"  for regression\n"
+	"       11 -- L2-regularized L2-loss support vector regression (primal)\n"
+	"       12 -- L2-regularized L2-loss support vector regression (dual)\n"
+	"       13 -- L2-regularized L1-loss support vector regression (dual)\n"
+	"  for outlier detection\n"
+	"       21 -- one-class support vector machine (dual)\n"
 	"-c cost : set the parameter C (default 1)\n"
+	"-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n"
+	"-n nu : set the parameter nu of one-class SVM (default 0.5)\n"
 	"-e epsilon : set tolerance of termination criterion\n"
-	"	-s 0 and 2\n" 
-	"		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" 
-	"		where f is the primal function and pos/neg are # of\n" 
-	"		positive/negative data (default 0.01)\n"
-	"	-s 1, 3, 4 and 7\n"
-	"		Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
-	"	-s 5 and 6\n"
-	"		|f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
-	"		where f is the primal function (default 0.01)\n"
+	"       -s 0 and 2\n"
+	"               |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
+	"               where f is the primal function and pos/neg are # of\n"
+	"               positive/negative data (default 0.01)\n"
+	"       -s 11\n"
+	"               |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.0001)\n"
+	"       -s 1, 3, 4, 7, and 21\n"
+	"               Dual maximal violation <= eps; similar to libsvm (default 0.1 except 0.01 for -s 21)\n"
+	"      -s 5 and 6\n"
+	"               |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
+	"               where f is the primal function (default 0.01)\n"
+	"       -s 12 and 13\n"
+	"               |f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
+	"               where f is the dual function (default 0.1)\n"
 	"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
+	"-R : not regularize the bias; must with -B 1 to have the bias; DON'T use this unless you know what it is\n"
+	"       (for -s 0, 2, 5, 6, 11)\n"
 	"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
 	"-v n: n-fold cross validation mode\n"
+	"-C : find parameters (C for -s 0, 2 and C, p for -s 11)\n"
 	"-q : quiet mode (no outputs)\n"
 	);
 	exit(1);
@@ -55,7 +72,7 @@ static int max_line_len;
 static char* readline(FILE *input)
 {
 	int len;
-	
+
 	if(fgets(line,max_line_len,input) == NULL)
 		return NULL;
 
@@ -73,12 +90,17 @@ static char* readline(FILE *input)
 void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
 void read_problem(const char *filename);
 void do_cross_validation();
+void do_find_parameters();
 
 struct feature_node *x_space;
 struct parameter param;
 struct problem prob;
 struct model* model_;
 int flag_cross_validation;
+int flag_find_parameters;
+int flag_C_specified;
+int flag_p_specified;
+int flag_solver_specified;
 int nr_fold;
 double bias;
 
@@ -94,11 +116,15 @@ int main(int argc, char **argv)
 
 	if(error_msg)
 	{
-		fprintf(stderr,"Error: %s\n",error_msg);
+		fprintf(stderr,"ERROR: %s\n",error_msg);
 		exit(1);
 	}
 
-	if(flag_cross_validation)
+	if (flag_find_parameters)
+	{
+		do_find_parameters();
+	}
+	else if(flag_cross_validation)
 	{
 		do_cross_validation();
 	}
@@ -121,18 +147,63 @@ int main(int argc, char **argv)
 	return 0;
 }
 
+void do_find_parameters()
+{
+	double start_C, start_p, best_C, best_p, best_score;
+	if (flag_C_specified)
+		start_C = param.C;
+	else
+		start_C = -1.0;
+	if (flag_p_specified)
+		start_p = param.p;
+	else
+		start_p = -1.0;
+
+	printf("Doing parameter search with %d-fold cross validation.\n", nr_fold);
+	find_parameters(&prob, &param, nr_fold, start_C, start_p, &best_C, &best_p, &best_score);
+	if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
+		printf("Best C = %g  CV accuracy = %g%%\n", best_C, 100.0*best_score);
+	else if(param.solver_type == L2R_L2LOSS_SVR)
+		printf("Best C = %g Best p = %g  CV MSE = %g\n", best_C, best_p, best_score);
+}
+
 void do_cross_validation()
 {
 	int i;
 	int total_correct = 0;
-	int *target = Malloc(int, prob.l);
+	double total_error = 0;
+	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
+	double *target = Malloc(double, prob.l);
 
 	cross_validation(&prob,&param,nr_fold,target);
-
-	for(i=0;i<prob.l;i++)
-		if(target[i] == prob.y[i])
-			++total_correct;
-	printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+	if(param.solver_type == L2R_L2LOSS_SVR ||
+	   param.solver_type == L2R_L1LOSS_SVR_DUAL ||
+	   param.solver_type == L2R_L2LOSS_SVR_DUAL)
+	{
+		for(i=0;i<prob.l;i++)
+		{
+			double y = prob.y[i];
+			double v = target[i];
+			total_error += (v-y)*(v-y);
+			sumv += v;
+			sumy += y;
+			sumvv += v*v;
+			sumyy += y*y;
+			sumvy += v*y;
+		}
+		printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
+		printf("Cross Validation Squared correlation coefficient = %g\n",
+				((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
+				((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
+			  );
+	}
+	else
+	{
+		for(i=0;i<prob.l;i++)
+			if(target[i] == prob.y[i])
+				++total_correct;
+		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
+	}
 
 	free(target);
 }
@@ -140,16 +211,24 @@ void do_cross_validation()
 void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
 {
 	int i;
-	void (*print_func)(const char*) = NULL;	// default printing to stdout
+	void (*print_func)(const char*) = NULL;  // default printing to stdout
 
 	// default values
 	param.solver_type = L2R_L2LOSS_SVC_DUAL;
 	param.C = 1;
+	param.p = 0.1;
+	param.nu = 0.5;
 	param.eps = INF; // see setting below
 	param.nr_weight = 0;
+	param.regularize_bias = 1;
 	param.weight_label = NULL;
 	param.weight = NULL;
+	param.init_sol = NULL;
 	flag_cross_validation = 0;
+	flag_C_specified = 0;
+	flag_p_specified = 0;
+	flag_solver_specified = 0;
+	flag_find_parameters = 0;
 	bias = -1;
 
 	// parse options
@@ -162,10 +241,21 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 		{
 			case 's':
 				param.solver_type = atoi(argv[i]);
+				flag_solver_specified = 1;
 				break;
 
 			case 'c':
 				param.C = atof(argv[i]);
+				flag_C_specified = 1;
+				break;
+
+			case 'p':
+				flag_p_specified = 1;
+				param.p = atof(argv[i]);
+				break;
+
+			case 'n':
+				param.nu = atof(argv[i]);
 				break;
 
 			case 'e':
@@ -199,6 +289,16 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 				i--;
 				break;
 
+			case 'C':
+				flag_find_parameters = 1;
+				i--;
+				break;
+
+			case 'R':
+				param.regularize_bias = 0;
+				i--;
+				break;
+
 			default:
 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
 				exit_with_help();
@@ -226,14 +326,52 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 		sprintf(model_file_name,"%s.model",p);
 	}
 
+	// default solver for parameter selection is L2R_L2LOSS_SVC
+	if(flag_find_parameters)
+	{
+		if(!flag_cross_validation)
+			nr_fold = 5;
+		if(!flag_solver_specified)
+		{
+			fprintf(stderr, "Solver not specified. Using -s 2\n");
+			param.solver_type = L2R_L2LOSS_SVC;
+		}
+		else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC && param.solver_type != L2R_L2LOSS_SVR)
+		{
+			fprintf(stderr, "Warm-start parameter search only available for -s 0, -s 2 and -s 11\n");
+			exit_with_help();
+		}
+	}
+
 	if(param.eps == INF)
 	{
-		if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
-			param.eps = 0.01;
-		else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS || param.solver_type == L2R_LR_DUAL)
-			param.eps = 0.1;
-		else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR)
-			param.eps = 0.01;
+		switch(param.solver_type)
+		{
+			case L2R_LR:
+			case L2R_L2LOSS_SVC:
+				param.eps = 0.01;
+				break;
+			case L2R_L2LOSS_SVR:
+				param.eps = 0.0001;
+				break;
+			case L2R_L2LOSS_SVC_DUAL:
+			case L2R_L1LOSS_SVC_DUAL:
+			case MCSVM_CS:
+			case L2R_LR_DUAL:
+				param.eps = 0.1;
+				break;
+			case L1R_L2LOSS_SVC:
+			case L1R_LR:
+				param.eps = 0.01;
+				break;
+			case L2R_L1LOSS_SVR_DUAL:
+			case L2R_L2LOSS_SVR_DUAL:
+				param.eps = 0.1;
+				break;
+			case ONECLASS_SVM:
+				param.eps = 0.01;
+				break;
+		}
 	}
 }
 
@@ -241,7 +379,7 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 void read_problem(const char *filename)
 {
 	int max_index, inst_max_index, i;
-	long int elements, j;
+	size_t elements, j;
 	FILE *fp = fopen(filename,"r");
 	char *endptr;
 	char *idx, *val, *label;
@@ -275,7 +413,7 @@ void read_problem(const char *filename)
 
 	prob.bias=bias;
 
-	prob.y = Malloc(int,prob.l);
+	prob.y = Malloc(double,prob.l);
 	prob.x = Malloc(struct feature_node *,prob.l);
 	x_space = Malloc(struct feature_node,elements+prob.l);
 
@@ -290,7 +428,7 @@ void read_problem(const char *filename)
 		if(label == NULL) // empty line
 			exit_input_error(i+1);
 
-		prob.y[i] = (int) strtol(label,&endptr,10);
+		prob.y[i] = strtod(label,&endptr);
 		if(endptr == label || *endptr != '\0')
 			exit_input_error(i+1);
 
@@ -330,7 +468,7 @@ void read_problem(const char *filename)
 	{
 		prob.n=max_index+1;
 		for(i=1;i<prob.l;i++)
-			(prob.x[i]-2)->index = prob.n; 
+			(prob.x[i]-2)->index = prob.n;
 		x_space[j-2].index = prob.n;
 	}
 	else
diff --git a/liblinear/tron.cpp b/liblinear/tron.cpp
deleted file mode 100644
index bff50787e..000000000
--- a/liblinear/tron.cpp
+++ /dev/null
@@ -1,235 +0,0 @@
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-#include "tron.h"
-
-#ifndef min
-template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
-#endif
-
-#ifndef max
-template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern double dnrm2_(int *, double *, int *);
-extern double ddot_(int *, double *, int *, double *, int *);
-extern int daxpy_(int *, double *, double *, int *, double *, int *);
-extern int dscal_(int *, double *, double *, int *);
-
-#ifdef __cplusplus
-}
-#endif
-
-static void default_print(const char *buf)
-{
-	fputs(buf,stdout);
-	fflush(stdout);
-}
-
-void TRON::info(const char *fmt,...)
-{
-	char buf[BUFSIZ];
-	va_list ap;
-	va_start(ap,fmt);
-	vsprintf(buf,fmt,ap);
-	va_end(ap);
-	(*tron_print_string)(buf);
-}
-
-TRON::TRON(const function *fun_obj, double eps, int max_iter)
-{
-	this->fun_obj=const_cast<function *>(fun_obj);
-	this->eps=eps;
-	this->max_iter=max_iter;
-	tron_print_string = default_print;
-}
-
-TRON::~TRON()
-{
-}
-
-void TRON::tron(double *w)
-{
-	// Parameters for updating the iterates.
-	double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
-
-	// Parameters for updating the trust region size delta.
-	double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
-
-	int n = fun_obj->get_nr_variable();
-	int i, cg_iter;
-	double delta, snorm, one=1.0;
-	double alpha, f, fnew, prered, actred, gs;
-	int search = 1, iter = 1, inc = 1;
-	double *s = new double[n];
-	double *r = new double[n];
-	double *w_new = new double[n];
-	double *g = new double[n];
-
-	for (i=0; i<n; i++)
-		w[i] = 0;
-
-        f = fun_obj->fun(w);
-	fun_obj->grad(w, g);
-	delta = dnrm2_(&n, g, &inc);
-	double gnorm1 = delta;
-	double gnorm = gnorm1;
-
-	if (gnorm <= eps*gnorm1)
-		search = 0;
-
-	iter = 1;
-
-	while (iter <= max_iter && search)
-	{
-		cg_iter = trcg(delta, g, s, r);
-
-		memcpy(w_new, w, sizeof(double)*n);
-		daxpy_(&n, &one, s, &inc, w_new, &inc);
-
-		gs = ddot_(&n, g, &inc, s, &inc);
-		prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
-                fnew = fun_obj->fun(w_new);
-
-		// Compute the actual reduction.
-	        actred = f - fnew;
-
-		// On the first iteration, adjust the initial step bound.
-		snorm = dnrm2_(&n, s, &inc);
-		if (iter == 1)
-			delta = min(delta, snorm);
-
-		// Compute prediction alpha*snorm of the step.
-		if (fnew - f - gs <= 0)
-			alpha = sigma3;
-		else
-			alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
-
-		// Update the trust region bound according to the ratio of actual to predicted reduction.
-		if (actred < eta0*prered)
-			delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
-		else if (actred < eta1*prered)
-			delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
-		else if (actred < eta2*prered)
-			delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
-		else
-			delta = max(delta, min(alpha*snorm, sigma3*delta));
-
-		info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
-
-		if (actred > eta0*prered)
-		{
-			iter++;
-			memcpy(w, w_new, sizeof(double)*n);
-			f = fnew;
-		        fun_obj->grad(w, g);
-
-			gnorm = dnrm2_(&n, g, &inc);
-			if (gnorm <= eps*gnorm1)
-				break;
-		}
-		if (f < -1.0e+32)
-		{
-			info("warning: f < -1.0e+32\n");
-			break;
-		}
-		if (fabs(actred) <= 0 && prered <= 0)
-		{
-			info("warning: actred and prered <= 0\n");
-			break;
-		}
-		if (fabs(actred) <= 1.0e-12*fabs(f) &&
-		    fabs(prered) <= 1.0e-12*fabs(f))
-		{
-			info("warning: actred and prered too small\n");
-			break;
-		}
-	}
-
-	delete[] g;
-	delete[] r;
-	delete[] w_new;
-	delete[] s;
-}
-
-int TRON::trcg(double delta, double *g, double *s, double *r)
-{
-	int i, inc = 1;
-	int n = fun_obj->get_nr_variable();
-	double one = 1;
-	double *d = new double[n];
-	double *Hd = new double[n];
-	double rTr, rnewTrnew, alpha, beta, cgtol;
-
-	for (i=0; i<n; i++)
-	{
-		s[i] = 0;
-		r[i] = -g[i];
-		d[i] = r[i];
-	}
-	cgtol = 0.1*dnrm2_(&n, g, &inc);
-
-	int cg_iter = 0;
-	rTr = ddot_(&n, r, &inc, r, &inc);
-	while (1)
-	{
-		if (dnrm2_(&n, r, &inc) <= cgtol)
-			break;
-		cg_iter++;
-		fun_obj->Hv(d, Hd);
-
-		alpha = rTr/ddot_(&n, d, &inc, Hd, &inc);
-		daxpy_(&n, &alpha, d, &inc, s, &inc);
-		if (dnrm2_(&n, s, &inc) > delta)
-		{
-			info("cg reaches trust region boundary\n");
-			alpha = -alpha;
-			daxpy_(&n, &alpha, d, &inc, s, &inc);
-
-			double std = ddot_(&n, s, &inc, d, &inc);
-			double sts = ddot_(&n, s, &inc, s, &inc);
-			double dtd = ddot_(&n, d, &inc, d, &inc);
-			double dsq = delta*delta;
-			double rad = sqrt(std*std + dtd*(dsq-sts));
-			if (std >= 0)
-				alpha = (dsq - sts)/(std + rad);
-			else
-				alpha = (rad - std)/dtd;
-			daxpy_(&n, &alpha, d, &inc, s, &inc);
-			alpha = -alpha;
-			daxpy_(&n, &alpha, Hd, &inc, r, &inc);
-			break;
-		}
-		alpha = -alpha;
-		daxpy_(&n, &alpha, Hd, &inc, r, &inc);
-		rnewTrnew = ddot_(&n, r, &inc, r, &inc);
-		beta = rnewTrnew/rTr;
-		dscal_(&n, &beta, d, &inc);
-		daxpy_(&n, &one, r, &inc, d, &inc);
-		rTr = rnewTrnew;
-	}
-
-	delete[] d;
-	delete[] Hd;
-
-	return(cg_iter);
-}
-
-double TRON::norm_inf(int n, double *x)
-{
-	double dmax = fabs(x[0]);
-	for (int i=1; i<n; i++)
-		if (fabs(x[i]) >= dmax)
-			dmax = fabs(x[i]);
-	return(dmax);
-}
-
-void TRON::set_print_string(void (*print_string) (const char *buf))
-{
-	tron_print_string = print_string;
-}
diff --git a/liblinear/tron.h b/liblinear/tron.h
deleted file mode 100644
index 3045c2e83..000000000
--- a/liblinear/tron.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef _TRON_H
-#define _TRON_H
-
-class function
-{
-public:
-	virtual double fun(double *w) = 0 ;
-	virtual void grad(double *w, double *g) = 0 ;
-	virtual void Hv(double *s, double *Hs) = 0 ;
-
-	virtual int get_nr_variable(void) = 0 ;
-	virtual ~function(void){}
-};
-
-class TRON
-{
-public:
-	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
-	~TRON();
-
-	void tron(double *w);
-	void set_print_string(void (*i_print) (const char *buf));
-
-private:
-	int trcg(double delta, double *g, double *s, double *r);
-	double norm_inf(int n, double *x);
-
-	double eps;
-	int max_iter;
-	function *fun_obj;
-	void info(const char *fmt,...);
-	void (*tron_print_string)(const char *buf);
-};
-#endif