|
15 | 15 | def compareByReplicates(adata, df_cond_ref, df_cond_test, var_names='target', test='ttest', ctrl_label='negative_control', growth_rate=1, filter_type='mean', filter_threshold=40): |
16 | 16 | """Calculate phenotype score and p-values comparing `cond_test` vs `cond_ref`. |
17 | 17 |
|
| 18 | + In this function, the phenotype calculation is done by comparing multiple replicates of `cond_test` vs `cond_ref`. |
| 19 | +
|
18 | 20 | Args: |
19 | 21 | adata (AnnData): AnnData object |
20 | 22 | df_cond_ref (pd.DataFrame): dataframe of condition reference |
@@ -43,8 +45,8 @@ def compareByReplicates(adata, df_cond_ref, df_cond_test, var_names='target', te |
43 | 45 | y = df_cond_test.to_numpy() |
44 | 46 |
|
45 | 47 | # get control values |
46 | | - x_ctrl = df_cond_ref[adat.var.targetType.eq(ctrl_label)].to_numpy() |
47 | | - y_ctrl = df_cond_test[adat.var.targetType.eq(ctrl_label)].to_numpy() |
| 48 | + x_ctrl = df_cond_ref[adat.var.targetType.eq(ctrl_label)].dropna().to_numpy() |
| 49 | + y_ctrl = df_cond_test[adat.var.targetType.eq(ctrl_label)].dropna().to_numpy() |
48 | 50 |
|
49 | 51 | # calculate phenotype scores |
50 | 52 | scores = calculateDelta( |
@@ -79,6 +81,27 @@ def compareByReplicates(adata, df_cond_ref, df_cond_test, var_names='target', te |
79 | 81 |
|
80 | 82 |
|
81 | 83 | def compareByTargetGroup(adata, df_cond_ref, df_cond_test, keep_top_n, var_names='target', test='ttest', ctrl_label='negative_control', growth_rate=1, filter_type='mean', filter_threshold=40): |
| 84 | + """Calculate phenotype score and p-values comparing `cond_test` vs `cond_ref`. |
| 85 | +
|
| 86 | + In this function, the phenotype calculation is done by comparing groups of |
| 87 | + guide elements (e.g. sgRNAs) that target the same gene or groups of pseudogene (i.e. |
| 88 | + subsampled groups of non-targeting control elements) between `cond_test` vs `cond_ref`. |
| 89 | +
|
| 90 | + Args: |
| 91 | + adata (AnnData): AnnData object |
| 92 | + df_cond_ref (pd.DataFrame): dataframe of condition reference |
| 93 | + df_cond_test (pd.DataFrame): dataframe of condition test |
| 94 | + keep_top_n (int): number of top guide elements to keep |
| 95 | + var_names (str): variable names to use as index in the result dataframe |
| 96 | + test (str): test to use for calculating p-value ('MW': Mann-Whitney U rank; 'ttest' : t-test) |
| 97 | + ctrl_label (str): control label, default is 'negative_control' |
| 98 | + growth_rate (int): growth rate |
| 99 | + filter_type (str): filter type to apply to low counts ('mean', 'both', 'either') |
| 100 | + filter_threshold (int): filter threshold for low counts (default is 40) |
| 101 | + |
| 102 | + Returns: |
| 103 | + pd.DataFrame: result dataframe |
| 104 | + """ |
82 | 105 |
|
83 | 106 | adat = adata.copy() |
84 | 107 |
|
|
0 commit comments