Skip to content

Commit 8fa9eef

Browse files
committed
Use poly to calculate poly contrast. Add ability to use standarize in addition to qr decomposition. Added all numpy polynomial types.
remove poly
1 parent c46a5b1 commit 8fa9eef

File tree

6 files changed

+125
-53
lines changed

6 files changed

+125
-53
lines changed

doc/API-reference.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ Spline regression
198198
.. autofunction:: cc
199199
.. autofunction:: te
200200

201-
Orthogonal Polynomial
202-
---------------------
201+
Polynomial
202+
----------
203203

204204
.. autofunction:: poly
205205

patsy/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ def _reexport(mod):
113113
import patsy.mgcv_cubic_splines
114114
_reexport(patsy.mgcv_cubic_splines)
115115

116-
import patsy.poly
117-
_reexport(patsy.poly)
116+
import patsy.polynomials
117+
_reexport(patsy.polynomials)
118118

119119
# XX FIXME: we aren't exporting any of the explicit parsing interface
120120
# yet. Need to figure out how to do that.

patsy/contrasts.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from patsy.util import (repr_pretty_delegate, repr_pretty_impl,
1818
safe_issubdtype,
1919
no_pickling, assert_no_pickling)
20+
from patsy.polynomials import Poly as Polynomial
2021

2122
class ContrastMatrix(object):
2223
"""A simple container for a matrix used for coding categorical factors.
@@ -263,11 +264,9 @@ def _code_either(self, intercept, levels):
263264
# quadratic, etc., functions of the raw scores, and then use 'qr' to
264265
# orthogonalize each column against those to its left.
265266
scores -= scores.mean()
266-
raw_poly = scores.reshape((-1, 1)) ** np.arange(n).reshape((1, -1))
267-
q, r = np.linalg.qr(raw_poly)
268-
q *= np.sign(np.diag(r))
269-
q /= np.sqrt(np.sum(q ** 2, axis=1))
270-
# The constant term is always all 1's -- we don't normalize it.
267+
raw_poly = Polynomial.vander(scores, n - 1, 'poly')
268+
alpha, norm, beta = Polynomial.gen_qr(raw_poly, n - 1)
269+
q = Polynomial.apply_qr(raw_poly, n - 1, alpha, norm, beta)
271270
q[:, 0] = 1
272271
names = [".Constant", ".Linear", ".Quadratic", ".Cubic"]
273272
names += ["^%s" % (i,) for i in range(4, n)]

patsy/poly.py renamed to patsy/polynomials.py

Lines changed: 115 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import pandas
1717

1818
class Poly(object):
19-
"""poly(x, degree=1, raw=False)
19+
"""poly(x, degree=3, polytype='poly', raw=False, scaler=None)
2020
2121
Generates an orthogonal polynomial transformation of x of degree.
2222
Generic usage is something along the lines of::
@@ -26,19 +26,29 @@ class Poly(object):
2626
to fit ``y`` as a function of ``x``, with a 4th degree polynomial.
2727
2828
:arg degree: The number of degrees for the polynomial expansion.
29+
:arg polytype: Either poly (the default), legendre, laguerre, hermite, or
30+
hermanite_e.
2931
:arg raw: When raw is False (the default), will return orthogonal
3032
polynomials.
33+
:arg scaler: Choice of 'qr' (default when raw=False) for QR-
34+
decomposition or 'standardize'.
3135
3236
.. versionadded:: 0.4.1
3337
"""
3438
def __init__(self):
3539
self._tmp = {}
36-
self._degree = None
37-
self._raw = None
3840

39-
def memorize_chunk(self, x, degree=3, raw=False):
41+
def memorize_chunk(self, x, degree=3, polytype='poly', raw=False,
42+
scaler=None):
43+
if not raw and (scaler is None):
44+
scaler = 'qr'
45+
if scaler not in ('qr', 'standardize', None):
46+
raise ValueError('input to \'scaler\' %s is not a valid '
47+
'scaling technique' % scaler)
4048
args = {"degree": degree,
41-
"raw": raw
49+
"raw": raw,
50+
"scaler": scaler,
51+
'polytype': polytype
4252
}
4353
self._tmp["args"] = args
4454
# XX: check whether we need x values before saving them
@@ -63,35 +73,27 @@ def memorize_finish(self):
6373
% (args["degree"],))
6474
if int(args["degree"]) != args["degree"]:
6575
raise ValueError("degree must be an integer (not %r)"
66-
% (self._degree,))
76+
% (args['degree'],))
6777

6878
# These are guaranteed to all be 1d vectors by the code above
6979
scores = np.concatenate(tmp["xs"])
70-
scores_mean = scores.mean()
71-
# scores -= scores_mean
72-
self.scores_mean = scores_mean
80+
7381
n = args['degree']
7482
self.degree = n
75-
raw_poly = scores.reshape((-1, 1)) ** np.arange(n + 1).reshape((1, -1))
76-
raw = args['raw']
77-
self.raw = raw
78-
if not raw:
79-
q, r = np.linalg.qr(raw_poly)
80-
# Q is now orthognoal of degree n. To match what R is doing, we
81-
# need to use the three-term recurrence technique to calculate
82-
# new alpha, beta, and norm.
83-
84-
self.alpha = (np.sum(scores.reshape((-1, 1)) * q[:, :n] ** 2,
85-
axis=0) /
86-
np.sum(q[:, :n] ** 2, axis=0))
87-
88-
# For reasons I don't understand, the norms R uses are based off
89-
# of the diagonal of the r upper triangular matrix.
90-
91-
self.norm = np.linalg.norm(q * np.diag(r), axis=0)
92-
self.beta = (self.norm[1:] / self.norm[:n]) ** 2
93-
94-
def transform(self, x, degree=3, raw=False):
83+
self.scaler = args['scaler']
84+
self.raw = args['raw']
85+
self.polytype = args['polytype']
86+
87+
if self.scaler is not None:
88+
raw_poly = self.vander(scores, n, self.polytype)
89+
90+
if self.scaler == 'qr':
91+
self.alpha, self.norm, self.beta = self.gen_qr(raw_poly, n)
92+
93+
if self.scaler == 'standardize':
94+
self.mean, self.var = self.gen_standardize(raw_poly)
95+
96+
def transform(self, x, degree=3, polytype='poly', raw=False, scaler=None):
9597
if have_pandas:
9698
if isinstance(x, (pandas.Series, pandas.DataFrame)):
9799
to_pandas = True
@@ -102,28 +104,75 @@ def transform(self, x, degree=3, raw=False):
102104
to_pandas = False
103105
x = np.array(x, ndmin=1).flatten()
104106

105-
if self.raw:
106-
n = self.degree
107-
p = x.reshape((-1, 1)) ** np.arange(n + 1).reshape((1, -1))
108-
else:
109-
# This is where the three-term recurrance technique is unwound.
107+
n = self.degree
108+
p = self.vander(x, n, self.polytype)
110109

111-
p = np.empty((x.shape[0], self.degree + 1))
112-
p[:, 0] = 1
110+
if self.scaler == 'qr':
111+
p = self.apply_qr(p, n, self.alpha, self.norm, self.beta)
113112

114-
for i in np.arange(self.degree):
115-
p[:, i + 1] = (x - self.alpha[i]) * p[:, i]
116-
if i > 0:
117-
p[:, i + 1] = (p[:, i + 1] -
118-
(self.beta[i - 1] * p[:, i - 1]))
119-
p /= self.norm
113+
if self.scaler == 'standardize':
114+
p = self.apply_standardize(p, self.mean, self.var)
120115

121116
p = p[:, 1:]
122117
if to_pandas:
123118
p = pandas.DataFrame(p)
124119
p.index = idx
125120
return p
126121

122+
@staticmethod
123+
def vander(x, n, polytype):
124+
v_func = {'poly': np.polynomial.polynomial.polyvander,
125+
'cheb': np.polynomial.chebyshev.chebvander,
126+
'legendre': np.polynomial.legendre.legvander,
127+
'laguerre': np.polynomial.laguerre.lagvander,
128+
'hermite': np.polynomial.hermite.hermvander,
129+
'hermite_e': np.polynomial.hermite_e.hermevander}
130+
raw_poly = v_func[polytype](x, n)
131+
return raw_poly
132+
133+
@staticmethod
134+
def gen_qr(raw_poly, n):
135+
# Q is now orthognoal of degree n. To match what R is doing, we
136+
# need to use the three-term recurrence technique to calculate
137+
# new alpha, beta, and norm.
138+
x = raw_poly[:, 1]
139+
q, r = np.linalg.qr(raw_poly)
140+
alpha = (np.sum(x.reshape((-1, 1)) * q[:, :n] ** 2, axis=0) /
141+
np.sum(q[:, :n] ** 2, axis=0))
142+
143+
# For reasons I don't understand, the norms R uses are based off
144+
# of the diagonal of the r upper triangular matrix.
145+
146+
norm = np.linalg.norm(q * np.diag(r), axis=0)
147+
beta = (norm[1:] / norm[:n]) ** 2
148+
return alpha, norm, beta
149+
150+
@staticmethod
151+
def gen_standardize(raw_poly):
152+
return raw_poly.mean(axis=0), raw_poly.var(axis=0)
153+
154+
@staticmethod
155+
def apply_qr(x, n, alpha, norm, beta):
156+
# This is where the three-term recurrence is unwound for the QR
157+
# decomposition.
158+
if np.ndim(x) == 2:
159+
x = x[:, 1]
160+
p = np.empty((x.shape[0], n + 1))
161+
p[:, 0] = 1
162+
163+
for i in np.arange(n):
164+
p[:, i + 1] = (x - alpha[i]) * p[:, i]
165+
if i > 0:
166+
p[:, i + 1] = (p[:, i + 1] - (beta[i - 1] * p[:, i - 1]))
167+
p /= norm
168+
return p
169+
170+
@staticmethod
171+
def apply_standardize(x, mean, var):
172+
x[:, 1:] = ((x[:, 1:] - mean[1:]) / (var[1:] ** 0.5))
173+
return x
174+
175+
127176
__getstate__ = no_pickling
128177

129178
poly = stateful_transform(Poly)
@@ -166,6 +215,24 @@ def test_poly_compat():
166215
start_idx = stop_idx + 1
167216
assert tests_ran == R_poly_num_tests
168217

218+
def test_poly_smoke():
219+
# Test that standardized values match.
220+
x = np.arange(27)
221+
vanders = ['poly', 'cheb', 'legendre', 'laguerre', 'hermite', 'hermite_e']
222+
scalers = ['raw', 'qr', 'standardize']
223+
for v in vanders:
224+
p1 = poly(x, polytype=v, scaler='standardize')
225+
p2 = poly(x, polytype=v, raw=True)
226+
p2 = (p2 - p2.mean(axis=0)) / p2.std(axis=0)
227+
np.testing.assert_allclose(p1, p2)
228+
229+
# Don't have tests for all this... so just make sure it works.
230+
for v in vanders:
231+
for s in scalers:
232+
if s == 'raw':
233+
poly(x, raw=True, polytype=v)
234+
else:
235+
poly(x, scaler=s, polytype=v)
169236

170237
def test_poly_errors():
171238
from nose.tools import assert_raises
@@ -177,3 +244,9 @@ def test_poly_errors():
177244
assert_raises(ValueError, poly, x, degree=-1)
178245
assert_raises(ValueError, poly, x, degree=0)
179246
assert_raises(ValueError, poly, x, degree=3.5)
247+
248+
#Invalid Poly Type
249+
assert_raises(KeyError, poly, x, polytype='foo')
250+
251+
#Invalid scaling type
252+
assert_raises(ValueError, poly, x, scaler='bar')

patsy/test_poly_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# This file auto-generated by tools/get-R-bs-test-vectors.R
1+
# This file auto-generated by tools/get-R-poly-test-vectors.R
22
# Using: R version 3.2.4 Revised (2016-03-16 r70336)
33
import numpy as np
44
R_poly_test_x = np.array([1, 1.5, 2.25, 3.375, 5.0625, 7.59375, 11.390625, 17.0859375, 25.62890625, 38.443359375, 57.6650390625, 86.49755859375, 129.746337890625, 194.6195068359375, 291.92926025390625, 437.89389038085938, 656.84083557128906, 985.26125335693359, 1477.8918800354004, 2216.8378200531006, ])

tools/get-R-poly-test-vectors.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cat("# This file auto-generated by tools/get-R-bs-test-vectors.R\n")
1+
cat("# This file auto-generated by tools/get-R-poly-test-vectors.R\n")
22
cat(sprintf("# Using: %s\n", R.Version()$version.string))
33
cat("import numpy as np\n")
44

0 commit comments

Comments
 (0)