1616 import pandas
1717
1818class Poly (object ):
19- """poly(x, degree=1, raw=False)
19+ """poly(x, degree=3, polytype='poly', raw=False, scaler=None )
2020
2121 Generates an orthogonal polynomial transformation of x of degree.
2222 Generic usage is something along the lines of::
@@ -26,19 +26,29 @@ class Poly(object):
2626 to fit ``y`` as a function of ``x``, with a 4th degree polynomial.
2727
2828 :arg degree: The number of degrees for the polynomial expansion.
29+ :arg polytype: Either poly (the default), legendre, laguerre, hermite, or
30+ hermanite_e.
2931 :arg raw: When raw is False (the default), will return orthogonal
3032 polynomials.
33+ :arg scaler: Choice of 'qr' (default when raw=False) for QR-
34+ decomposition or 'standardize'.
3135
3236 .. versionadded:: 0.4.1
3337 """
3438 def __init__ (self ):
3539 self ._tmp = {}
36- self ._degree = None
37- self ._raw = None
3840
39- def memorize_chunk (self , x , degree = 3 , raw = False ):
41+ def memorize_chunk (self , x , degree = 3 , polytype = 'poly' , raw = False ,
42+ scaler = None ):
43+ if not raw and (scaler is None ):
44+ scaler = 'qr'
45+ if scaler not in ('qr' , 'standardize' , None ):
46+ raise ValueError ('input to \' scaler\' %s is not a valid '
47+ 'scaling technique' % scaler )
4048 args = {"degree" : degree ,
41- "raw" : raw
49+ "raw" : raw ,
50+ "scaler" : scaler ,
51+ 'polytype' : polytype
4252 }
4353 self ._tmp ["args" ] = args
4454 # XX: check whether we need x values before saving them
@@ -63,35 +73,27 @@ def memorize_finish(self):
6373 % (args ["degree" ],))
6474 if int (args ["degree" ]) != args ["degree" ]:
6575 raise ValueError ("degree must be an integer (not %r)"
66- % (self . _degree ,))
76+ % (args [ 'degree' ] ,))
6777
6878 # These are guaranteed to all be 1d vectors by the code above
6979 scores = np .concatenate (tmp ["xs" ])
70- scores_mean = scores .mean ()
71- # scores -= scores_mean
72- self .scores_mean = scores_mean
80+
7381 n = args ['degree' ]
7482 self .degree = n
75- raw_poly = scores .reshape ((- 1 , 1 )) ** np .arange (n + 1 ).reshape ((1 , - 1 ))
76- raw = args ['raw' ]
77- self .raw = raw
78- if not raw :
79- q , r = np .linalg .qr (raw_poly )
80- # Q is now orthognoal of degree n. To match what R is doing, we
81- # need to use the three-term recurrence technique to calculate
82- # new alpha, beta, and norm.
83-
84- self .alpha = (np .sum (scores .reshape ((- 1 , 1 )) * q [:, :n ] ** 2 ,
85- axis = 0 ) /
86- np .sum (q [:, :n ] ** 2 , axis = 0 ))
87-
88- # For reasons I don't understand, the norms R uses are based off
89- # of the diagonal of the r upper triangular matrix.
90-
91- self .norm = np .linalg .norm (q * np .diag (r ), axis = 0 )
92- self .beta = (self .norm [1 :] / self .norm [:n ]) ** 2
93-
94- def transform (self , x , degree = 3 , raw = False ):
83+ self .scaler = args ['scaler' ]
84+ self .raw = args ['raw' ]
85+ self .polytype = args ['polytype' ]
86+
87+ if self .scaler is not None :
88+ raw_poly = self .vander (scores , n , self .polytype )
89+
90+ if self .scaler == 'qr' :
91+ self .alpha , self .norm , self .beta = self .gen_qr (raw_poly , n )
92+
93+ if self .scaler == 'standardize' :
94+ self .mean , self .var = self .gen_standardize (raw_poly )
95+
96+ def transform (self , x , degree = 3 , polytype = 'poly' , raw = False , scaler = None ):
9597 if have_pandas :
9698 if isinstance (x , (pandas .Series , pandas .DataFrame )):
9799 to_pandas = True
@@ -102,28 +104,75 @@ def transform(self, x, degree=3, raw=False):
102104 to_pandas = False
103105 x = np .array (x , ndmin = 1 ).flatten ()
104106
105- if self .raw :
106- n = self .degree
107- p = x .reshape ((- 1 , 1 )) ** np .arange (n + 1 ).reshape ((1 , - 1 ))
108- else :
109- # This is where the three-term recurrance technique is unwound.
107+ n = self .degree
108+ p = self .vander (x , n , self .polytype )
110109
111- p = np . empty (( x . shape [ 0 ], self .degree + 1 ))
112- p [:, 0 ] = 1
110+ if self .scaler == 'qr' :
111+ p = self . apply_qr ( p , n , self . alpha , self . norm , self . beta )
113112
114- for i in np .arange (self .degree ):
115- p [:, i + 1 ] = (x - self .alpha [i ]) * p [:, i ]
116- if i > 0 :
117- p [:, i + 1 ] = (p [:, i + 1 ] -
118- (self .beta [i - 1 ] * p [:, i - 1 ]))
119- p /= self .norm
113+ if self .scaler == 'standardize' :
114+ p = self .apply_standardize (p , self .mean , self .var )
120115
121116 p = p [:, 1 :]
122117 if to_pandas :
123118 p = pandas .DataFrame (p )
124119 p .index = idx
125120 return p
126121
122+ @staticmethod
123+ def vander (x , n , polytype ):
124+ v_func = {'poly' : np .polynomial .polynomial .polyvander ,
125+ 'cheb' : np .polynomial .chebyshev .chebvander ,
126+ 'legendre' : np .polynomial .legendre .legvander ,
127+ 'laguerre' : np .polynomial .laguerre .lagvander ,
128+ 'hermite' : np .polynomial .hermite .hermvander ,
129+ 'hermite_e' : np .polynomial .hermite_e .hermevander }
130+ raw_poly = v_func [polytype ](x , n )
131+ return raw_poly
132+
133+ @staticmethod
134+ def gen_qr (raw_poly , n ):
135+ # Q is now orthognoal of degree n. To match what R is doing, we
136+ # need to use the three-term recurrence technique to calculate
137+ # new alpha, beta, and norm.
138+ x = raw_poly [:, 1 ]
139+ q , r = np .linalg .qr (raw_poly )
140+ alpha = (np .sum (x .reshape ((- 1 , 1 )) * q [:, :n ] ** 2 , axis = 0 ) /
141+ np .sum (q [:, :n ] ** 2 , axis = 0 ))
142+
143+ # For reasons I don't understand, the norms R uses are based off
144+ # of the diagonal of the r upper triangular matrix.
145+
146+ norm = np .linalg .norm (q * np .diag (r ), axis = 0 )
147+ beta = (norm [1 :] / norm [:n ]) ** 2
148+ return alpha , norm , beta
149+
150+ @staticmethod
151+ def gen_standardize (raw_poly ):
152+ return raw_poly .mean (axis = 0 ), raw_poly .var (axis = 0 )
153+
154+ @staticmethod
155+ def apply_qr (x , n , alpha , norm , beta ):
156+ # This is where the three-term recurrence is unwound for the QR
157+ # decomposition.
158+ if np .ndim (x ) == 2 :
159+ x = x [:, 1 ]
160+ p = np .empty ((x .shape [0 ], n + 1 ))
161+ p [:, 0 ] = 1
162+
163+ for i in np .arange (n ):
164+ p [:, i + 1 ] = (x - alpha [i ]) * p [:, i ]
165+ if i > 0 :
166+ p [:, i + 1 ] = (p [:, i + 1 ] - (beta [i - 1 ] * p [:, i - 1 ]))
167+ p /= norm
168+ return p
169+
170+ @staticmethod
171+ def apply_standardize (x , mean , var ):
172+ x [:, 1 :] = ((x [:, 1 :] - mean [1 :]) / (var [1 :] ** 0.5 ))
173+ return x
174+
175+
127176 __getstate__ = no_pickling
128177
129178poly = stateful_transform (Poly )
@@ -166,6 +215,24 @@ def test_poly_compat():
166215 start_idx = stop_idx + 1
167216 assert tests_ran == R_poly_num_tests
168217
218+ def test_poly_smoke ():
219+ # Test that standardized values match.
220+ x = np .arange (27 )
221+ vanders = ['poly' , 'cheb' , 'legendre' , 'laguerre' , 'hermite' , 'hermite_e' ]
222+ scalers = ['raw' , 'qr' , 'standardize' ]
223+ for v in vanders :
224+ p1 = poly (x , polytype = v , scaler = 'standardize' )
225+ p2 = poly (x , polytype = v , raw = True )
226+ p2 = (p2 - p2 .mean (axis = 0 )) / p2 .std (axis = 0 )
227+ np .testing .assert_allclose (p1 , p2 )
228+
229+ # Don't have tests for all this... so just make sure it works.
230+ for v in vanders :
231+ for s in scalers :
232+ if s == 'raw' :
233+ poly (x , raw = True , polytype = v )
234+ else :
235+ poly (x , scaler = s , polytype = v )
169236
170237def test_poly_errors ():
171238 from nose .tools import assert_raises
@@ -177,3 +244,9 @@ def test_poly_errors():
177244 assert_raises (ValueError , poly , x , degree = - 1 )
178245 assert_raises (ValueError , poly , x , degree = 0 )
179246 assert_raises (ValueError , poly , x , degree = 3.5 )
247+
248+ #Invalid Poly Type
249+ assert_raises (KeyError , poly , x , polytype = 'foo' )
250+
251+ #Invalid scaling type
252+ assert_raises (ValueError , poly , x , scaler = 'bar' )
0 commit comments