Update liblinear to 2.50

2026-01-29 17:49:08 +00:00 · 2026-01-28 22:23:52 +00:00
parent c1d16550dc
commit 59bb6e4e4e
5 changed files with 66 additions and 14 deletions
--- a/liblinear/Makefile
+++ b/liblinear/Makefile
@@ -3,9 +3,7 @@ CC ?= gcc
 CFLAGS = -Wall -Wconversion -O3 -fPIC
 LIBS = blas/blas.a
 #LIBS = -lblas
-SHVER = 5
-AR = ar
-RANLIB = ranlib
+SHVER = 6
 OS = $(shell uname)
 ifeq ($(OS),Darwin)
 	SHARED_LIB_FLAG = -dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)
@@ -18,10 +16,6 @@ all: train predict
 lib: linear.o newton.o blas/blas.a
 	$(CXX) $(SHARED_LIB_FLAG) linear.o newton.o blas/blas.a -o liblinear.so.$(SHVER)

-liblinear.a: linear.o newton.o blas/blas.a
-	$(AR) rcv liblinear.a linear.o newton.o blas/*.o
-	$(RANLIB) liblinear.a
-
 train: newton.o linear.o train.c blas/blas.a
 	$(CXX) $(CFLAGS) -o train train.c newton.o linear.o $(LIBS)

--- a/liblinear/README
+++ b/liblinear/README
@@ -81,8 +81,7 @@ On Unix systems, type `make' to build the `train', `predict',
 and `svm-scale' programs. Run them without arguments to show the usages.

 On other systems, consult `Makefile' to build them (e.g., see
-'Building Windows binaries' in this file) or use the pre-built
-binaries (Windows binaries are in the directory `windows').
+'Building Windows binaries' in this file).

 This software uses some level-1 BLAS subroutines. The needed functions are
 included in this package.  If a BLAS library is available on your
@@ -398,6 +397,8 @@ in linear.h, so you can check the version number.
                double* weight;
                double p;
                double *init_sol;
+                int regularize_bias;
+                bool w_recalc;  /* for -s 1, 3; may be extended to -s 12, 13, 21 */
        };

    solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL, ONECLASS_SVM.
@@ -438,6 +439,16 @@ in linear.h, so you can check the version number.
    solvers). See the explanation of the vector w in the model
    structure.

+    regularize_bias is the flag for bias regularization. By default it is set
+    to be 1. If you don't want to regularize the bias, set it to 0 with
+    specifying the bias in the problem structure to be 1. (DON'T use it unless
+    you know what it is.)
+
+    w_recalc is the flag for recalculating w after optimization
+    with a dual-based solver. This may further reduces the weight density
+    when the data is sparse. The default value is set as false for time
+    efficiency. Currently it only takes effect in -s 1 and 3.
+
    *NOTE* To avoid wrong parameters, check_parameter() should be
    called before train().

@@ -643,11 +654,32 @@ in linear.h, so you can check the version number.
        set_print_string_function(NULL);
    for default printing to stdout.

+    Please note that this function is not thread-safe. When multiple threads load or
+    use the same dynamic library (for example, liblinear.so.6), they actually share the
+    same memory space of the dynamic library, which results in all threads modifying
+    the same static function pointer, liblinear_print_string, in linear.cpp when they
+    call this function.
+
+    For example, suppose we have threads A and B. They call this function sequentially
+    and pass their own thread-local print_func into it. After that, they both call (*liblinear_print_string)(str)
+    once. When the last thread finishes setting it (say B), liblinear_print_string
+    is set to B.print_func. Now, if thread A wants to access liblinear_print_string,
+    it is actually accessing B.print_func rather than A.print_func, which is incorrect
+    since we expect to use the functionality of A.print_func.
+
+    Even if A.print_func and B.print_func have identical functionality, it is still risky.
+    Suppose liblinear_print_string is now set to B.print_func, and B deletes B.print_func
+    after finishing its work. Later, thread A calls liblinear_print_string, but the address
+    points to, which is B.print_func, has already been deleted. This invalid memory access
+    will crash the program. To mitigate this issue, in this example, you should ensure that
+    A.print_func and B.print_func remain valid after threads finish their work. For example,
+    in Python, you can assign them as global variables.
+
 Building Windows Binaries
 =========================

-Windows binaries are available in the directory `windows'. To re-build
-them via Visual C++, use the following steps:
+Starting from version 2.48, we no longer provide pre-built Windows binaries, 
+to build them via Visual C++, use the following steps:

 1. Open a dos command box and change to liblinear directory. If
 environment variables of VC++ have not been set, type
--- a/liblinear/linear.cpp
+++ b/liblinear/linear.cpp
@@ -1064,6 +1064,21 @@ static int solve_l2r_l1l2_svc(const problem *prob, const parameter *param, doubl
 	info("Objective value = %lf\n",v/2);
 	info("nSV = %d\n",nSV);

+	// Reconstruct w from the primal-dual relationship w=sum(\alpha_i y_i x_i)
+	// This may reduce the weight density. Some zero weights become non-zeros
+	// due to the numerical update w <- w + (alpha[i] - alpha_old) y_i x_i.
+	if (param->w_recalc)
+	{
+		for(i=0; i<w_size; i++)
+			w[i] = 0;
+		for(i=0; i<l; i++)
+		{
+			feature_node * const xi = prob->x[i];
+			if(alpha[i] > 0)
+				sparse_operator::axpy(y[i]*alpha[i], xi, w);
+		}
+	}
+
 	delete [] QD;
 	delete [] alpha;
 	delete [] y;
@@ -2194,11 +2209,14 @@ static int partition(feature_node *nodes, int low, int high)
 	return index;
 }

-// rearrange nodes so that nodes[:k] contains nodes with the k smallest values.
+// rearrange nodes so that
+// nodes[i] <= nodes[k] for all i < k
+// nodes[k] <= nodes[j] for all j > k
+// low and high are the bounds of the index range during the rearranging process
 static void quick_select_min_k(feature_node *nodes, int low, int high, int k)
 {
 	int pivot;
-	if(low == high)
+	if(low == high || high < k)
 		return;
 	pivot = partition(nodes, low, high);
 	if(pivot == k)
@@ -3718,6 +3736,11 @@ const char *check_parameter(const problem *prob, const parameter *param)
 		&& param->solver_type != L2R_L2LOSS_SVR)
 		return "Initial-solution specification supported only for solvers L2R_LR, L2R_L2LOSS_SVC, and L2R_L2LOSS_SVR";

+	if(param->w_recalc == true
+		&& param->solver_type != L2R_L2LOSS_SVC_DUAL
+		&& param->solver_type != L2R_L1LOSS_SVC_DUAL)
+		return "Recalculating w in the end is only for dual solvers for L2-regularized L1/L2-loss SVM";
+
 	return NULL;
 }

--- a/liblinear/linear.h
+++ b/liblinear/linear.h
@@ -1,7 +1,8 @@
+#include <stdbool.h>
 #ifndef _LIBLINEAR_H
 #define _LIBLINEAR_H

-#define LIBLINEAR_VERSION 247
+#define LIBLINEAR_VERSION 250

 #ifdef __cplusplus
 extern "C" {
@@ -39,6 +40,7 @@ struct parameter
 	double nu;
 	double *init_sol;
 	int regularize_bias;
+	bool w_recalc;		/* for -s 1, 3; may be extended to -s 12, 13, 21 */
 };

 struct model
--- a/liblinear/train.c
+++ b/liblinear/train.c
@@ -224,6 +224,7 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 	param.weight_label = NULL;
 	param.weight = NULL;
 	param.init_sol = NULL;
+	param.w_recalc = false;
 	flag_cross_validation = 0;
 	flag_C_specified = 0;
 	flag_p_specified = 0;