2121}
2222
2323
24- def getCombinedScore (df , score_col = 'score' , pvalue_col = 'pvalue' , ctrl_label = 'negative_control' ):
24+ def getCombinedScore (df_in , score_col = 'score' , pvalue_col = 'pvalue' , target_col = 'target ' , ctrl_label = 'negative_control' ):
2525 """
2626 Calculate the combined score column based on the given phenotypic scores and p-values.
2727 Combined score is calculated as:
2828
2929 $combined\_score = \f rac{score}{pseudo\_sd} \t imes -\log_{10}(pvalue)$
3030
3131 Parameters:
32- df (pandas.DataFrame): The input DataFrame.
32+ df_in (pandas.DataFrame): The input DataFrame.
3333 score_col (str): The column name for the individual scores. Default is 'score'.
3434 pvalue_col (str): The column name for the p-values. Default is 'pvalue'.
3535 target_col (str): The column name for the target variable. Default is 'target'.
@@ -39,18 +39,22 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='neg
3939 Returns:
4040 pandas.Series: The calculated combined score column.
4141 """
42- if 'target' not in df .columns :
43- raise ValueError ('Column "target" not found in the input DataFrame.' )
42+ # make a copy of input dataframe
43+ df = df_in .copy ()
44+
45+ for col in [score_col , pvalue_col , target_col ]:
46+ if col not in df .columns :
47+ raise ValueError (f'Column "{ col } " not found in the input DataFrame.' )
4448
4549 # calculate pseudo_sd
46- pseudo_sd = df [df ['target' ].eq (ctrl_label )][score_col ].tolist ()
50+ pseudo_sd = df [df [target_col ].eq (ctrl_label )][score_col ].tolist ()
4751 pseudo_sd = np .std (pseudo_sd )
4852
4953 # calculate combined score
5054 return df [score_col ]/ pseudo_sd * - np .log10 (df [pvalue_col ])
5155
5256
53- def annotateScoreTable (df_in , up_hit , down_hit , threshold , score_col = None , pvalue_col = None , ctrl_label = 'negative_control' ):
57+ def annotateScoreTable (df_in , up_hit , down_hit , threshold , score_col = 'score' , pvalue_col = 'pvalue' , target_col = 'target' , ctrl_label = 'negative_control' ):
5458 """
5559 Annotate the given score tabel
5660
@@ -60,49 +64,47 @@ def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalu
6064 up_hit (str): up hit label
6165 down_hit (str): down hit label
6266 threshold (int): threshold value
63- score_col (str): score column name
64- pvalue_col (str): pvalue column name
65- ctrl_label (str): control label value
67+ score_col (str): score column name. Default is 'score'.
68+ target_col (str): column name for the target variable. Default is 'target'.
69+ pvalue_col (str): pvalue column name. Default is 'pvalue'.
70+ ctrl_label (str): control label value. Default is 'negative_control'.
6671
6772 Returns:
6873 pd.DataFrame: annotated score dataframe
6974 """
70- if score_col is None : score_col = 'score'
71- if pvalue_col is None : pvalue_col = 'pvalue'
75+ # make a copy of input dataframe
76+ df = df_in . copy ()
7277
73- sel = ['target' ,score_col , pvalue_col ]
74-
75- for col in sel :
76- if col not in df_in .columns :
78+ for col in [score_col , pvalue_col , target_col ]:
79+ if col not in df .columns :
7780 raise ValueError (f'Column "{ col } " not found in the input DataFrame.' )
78-
79- # make a copy of input dataframe
80- df = df_in [sel ].copy ()
81- # # rename/reformat columns
82- # df.columns = ['target', 'score', 'pvalue']
81+
8382 df [score_col ] = df [score_col ].astype (float )
8483 df [pvalue_col ] = df [pvalue_col ].astype (float )
8584
8685 # add combined score column
87- df ['combined_score' ] = getCombinedScore (df , score_col , pvalue_col , ctrl_label )
86+ df ['combined_score' ] = getCombinedScore (
87+ df ,
88+ score_col = score_col , pvalue_col = pvalue_col , target_col = target_col ,
89+ ctrl_label = ctrl_label )
8890
8991 # add label column
9092 df ['label' ] = '.'
9193
9294 # annotate hits: up
9395 df .loc [
94- (df [score_col ] > 0 ) & (~ df ['target' ].eq (ctrl_label )) &
96+ (df [score_col ] > 0 ) & (~ df [target_col ].eq (ctrl_label )) &
9597 (df ['combined_score' ] >= threshold ), 'label'
9698 ] = up_hit
9799
98100 # annotate hits: down
99101 df .loc [
100- (df [score_col ] < 0 ) & (~ df ['target' ].eq (ctrl_label )) &
102+ (df [score_col ] < 0 ) & (~ df [target_col ].eq (ctrl_label )) &
101103 (df ['combined_score' ] <= - threshold ), 'label'
102104 ] = down_hit
103105
104106 # annotate control
105- df .loc [df ['target' ].eq (ctrl_label ), 'label' ] = ctrl_label
107+ df .loc [df [target_col ].eq (ctrl_label ), 'label' ] = ctrl_label
106108
107109 # annotate non-hit
108110 df .loc [df ['label' ] == '.' , 'label' ] = 'target_non_hit'
0 commit comments