File manager - Edit - /usr/local/lib/python3.9/dist-packages/sklearn/feature_selection/tests/test_chi2.py
Back
""" Tests for chi2, currently the only feature selection function designed specifically to work with sparse matrices. """ import warnings import numpy as np import pytest import scipy.stats from sklearn.feature_selection import SelectKBest, chi2 from sklearn.feature_selection._univariate_selection import _chisquare from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal from sklearn.utils.fixes import COO_CONTAINERS, CSR_CONTAINERS # Feature 0 is highly informative for class 1; # feature 1 is the same everywhere; # feature 2 is a bit informative for class 2. X = [[2, 1, 2], [9, 1, 1], [6, 1, 2], [0, 1, 2]] y = [0, 1, 2, 2] def mkchi2(k): """Make k-best chi2 selector""" return SelectKBest(chi2, k=k) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_chi2(csr_container): # Test Chi2 feature extraction chi2 = mkchi2(k=1).fit(X, y) chi2 = mkchi2(k=1).fit(X, y) assert_array_equal(chi2.get_support(indices=True), [0]) assert_array_equal(chi2.transform(X), np.array(X)[:, [0]]) chi2 = mkchi2(k=2).fit(X, y) assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2]) Xsp = csr_container(X, dtype=np.float64) chi2 = mkchi2(k=2).fit(Xsp, y) assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2]) Xtrans = chi2.transform(Xsp) assert_array_equal(Xtrans.shape, [Xsp.shape[0], 2]) # == doesn't work on scipy.sparse matrices Xtrans = Xtrans.toarray() Xtrans2 = mkchi2(k=2).fit_transform(Xsp, y).toarray() assert_array_almost_equal(Xtrans, Xtrans2) @pytest.mark.parametrize("coo_container", COO_CONTAINERS) def test_chi2_coo(coo_container): # Check that chi2 works with a COO matrix # (as returned by CountVectorizer, DictVectorizer) Xcoo = coo_container(X) mkchi2(k=2).fit_transform(Xcoo, y) # if we got here without an exception, we're safe @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_chi2_negative(csr_container): # Check for proper error on negative numbers in the input X. X, y = [[0, 1], [-1e-20, 1]], [0, 1] for X in (X, np.array(X), csr_container(X)): with pytest.raises(ValueError): chi2(X, y) def test_chi2_unused_feature(): # Unused feature should evaluate to NaN # and should issue no runtime warning with warnings.catch_warnings(record=True) as warned: warnings.simplefilter("always") chi, p = chi2([[1, 0], [0, 0]], [1, 0]) for w in warned: if "divide by zero" in repr(w): raise AssertionError("Found unexpected warning %s" % w) assert_array_equal(chi, [1, np.nan]) assert_array_equal(p[1], np.nan) def test_chisquare(): # Test replacement for scipy.stats.chisquare against the original. obs = np.array([[2.0, 2.0], [1.0, 1.0]]) exp = np.array([[1.5, 1.5], [1.5, 1.5]]) # call SciPy first because our version overwrites obs chi_scp, p_scp = scipy.stats.chisquare(obs, exp) chi_our, p_our = _chisquare(obs, exp) assert_array_almost_equal(chi_scp, chi_our) assert_array_almost_equal(p_scp, p_our)
| ver. 1.4 |
Github
|
.
| PHP 7.4.33 | Generation time: 0.52 |
proxy
|
phpinfo
|
Settings