Skip to content

Commit 1b47fb9

Browse files
authored
Merge pull request #92 from ArcInstitute/dev
minor bug fixes
2 parents c77a166 + a4990aa commit 1b47fb9

File tree

5 files changed

+48
-14
lines changed

5 files changed

+48
-14
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ The first step in analyzing CRISPR screens with deep sequencing readouts is to p
185185

186186
</details>
187187

188+
<br>
189+
188190
### Step 2: Phenotype calculation
189191

190192
Once you have the counts, you can use ScreenPro2 `phenoscore` and `phenostats` modules to calculate the phenotype scores and statistics between screen arms.
@@ -298,6 +300,8 @@ Once you have the counts, you can use ScreenPro2 `phenoscore` and `phenostats` m
298300
Last but not least, ScreenPro2 runs faster than ScreenProcessing (thanks to [biobear](https://github.com/wheretrue/biobear)) for processing FASTQ files. -->
299301

300302

303+
<br>
304+
301305
### Step 3: Data visualization
302306

303307
Once the phenotypes are calculated, you can extract and explore the results using the `.phenotypes` attribute of the `PooledScreens` object. Currently, there are very limited functionalities built-in to visualize the results, but we are working on adding more features to make it easier for users. However, you can easily extract the results and use other libraries like `seaborn` and `matplotlib` in Python or `ggplot2` in R to visualize the results.

screenpro/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@
3131
from .dashboard import DrugScreenDashboard
3232

3333

34-
__version__ = "0.4.11"
34+
__version__ = "0.4.12"
3535
__author__ = "Abe Arab"
3636

screenpro/assays/__init__.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -332,11 +332,14 @@ def buildPhenotypeData(self, run_name='auto',db_rate_col='pop_doubling', **kwarg
332332
untreated = self.phenotypes[run_name]['config']['untreated']
333333
treated = self.phenotypes[run_name]['config']['treated']
334334

335-
#TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate`
336-
growth_factor_table = self._calculateGrowthFactor(
337-
untreated = untreated, treated = treated,
338-
db_rate_col = db_rate_col
339-
)
335+
if type(treated) != list: treated = [treated]
336+
337+
if db_rate_col:
338+
#TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate`
339+
growth_factor_table = self._calculateGrowthFactor(
340+
untreated = untreated, treated = treated,
341+
db_rate_col = db_rate_col
342+
)
340343

341344
pdata_list = []
342345

@@ -345,9 +348,12 @@ def buildPhenotypeData(self, run_name='auto',db_rate_col='pop_doubling', **kwarg
345348
score_tag, comparison = phenotype_name.split(':')
346349
cond_test, cond_ref = comparison.split('_vs_')
347350

348-
growth_rate_reps=growth_factor_table.query(
349-
f'score=="{score_tag}"'
350-
).set_index('replicate')['growth_factor'].to_dict()
351+
if db_rate_col:
352+
growth_rate_reps=growth_factor_table.query(
353+
f'score=="{score_tag}"'
354+
).set_index('replicate')['growth_factor'].to_dict()
355+
else:
356+
growth_rate_reps=None
351357

352358
pdata = getPhenotypeData(
353359
self.adata, score_tag=score_tag,
@@ -385,7 +391,7 @@ def drawVolcano(
385391

386392
score_tag, _ = phenotype_name.split(':')
387393

388-
df = self.phenotypes[run_name]['results'][phenotype_name]
394+
df = self.phenotypes[run_name]['results'][phenotype_name].dropna()
389395

390396
df = annotateScoreTable(
391397
df,

screenpro/phenoscore/delta.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
def compareByReplicates(adata, df_cond_ref, df_cond_test, var_names='target', test='ttest', ctrl_label='negative_control', growth_rate=1, filter_type='mean', filter_threshold=40):
1616
"""Calculate phenotype score and p-values comparing `cond_test` vs `cond_ref`.
1717
18+
In this function, the phenotype calculation is done by comparing multiple replicates of `cond_test` vs `cond_ref`.
19+
1820
Args:
1921
adata (AnnData): AnnData object
2022
df_cond_ref (pd.DataFrame): dataframe of condition reference
@@ -43,8 +45,8 @@ def compareByReplicates(adata, df_cond_ref, df_cond_test, var_names='target', te
4345
y = df_cond_test.to_numpy()
4446

4547
# get control values
46-
x_ctrl = df_cond_ref[adat.var.targetType.eq(ctrl_label)].to_numpy()
47-
y_ctrl = df_cond_test[adat.var.targetType.eq(ctrl_label)].to_numpy()
48+
x_ctrl = df_cond_ref[adat.var.targetType.eq(ctrl_label)].dropna().to_numpy()
49+
y_ctrl = df_cond_test[adat.var.targetType.eq(ctrl_label)].dropna().to_numpy()
4850

4951
# calculate phenotype scores
5052
scores = calculateDelta(
@@ -79,6 +81,27 @@ def compareByReplicates(adata, df_cond_ref, df_cond_test, var_names='target', te
7981

8082

8183
def compareByTargetGroup(adata, df_cond_ref, df_cond_test, keep_top_n, var_names='target', test='ttest', ctrl_label='negative_control', growth_rate=1, filter_type='mean', filter_threshold=40):
84+
"""Calculate phenotype score and p-values comparing `cond_test` vs `cond_ref`.
85+
86+
In this function, the phenotype calculation is done by comparing groups of
87+
guide elements (e.g. sgRNAs) that target the same gene or groups of pseudogene (i.e.
88+
subsampled groups of non-targeting control elements) between `cond_test` vs `cond_ref`.
89+
90+
Args:
91+
adata (AnnData): AnnData object
92+
df_cond_ref (pd.DataFrame): dataframe of condition reference
93+
df_cond_test (pd.DataFrame): dataframe of condition test
94+
keep_top_n (int): number of top guide elements to keep
95+
var_names (str): variable names to use as index in the result dataframe
96+
test (str): test to use for calculating p-value ('MW': Mann-Whitney U rank; 'ttest' : t-test)
97+
ctrl_label (str): control label, default is 'negative_control'
98+
growth_rate (int): growth rate
99+
filter_type (str): filter type to apply to low counts ('mean', 'both', 'either')
100+
filter_threshold (int): filter threshold for low counts (default is 40)
101+
102+
Returns:
103+
pd.DataFrame: result dataframe
104+
"""
82105

83106
adat = adata.copy()
84107

screenpro/plotting/_rank.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from ._utils import yellow_blue
44

55

6-
def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_dict=None, xlabel='Rank', ylabel='Values', title='Rank Plot', ax=None, dot_size=1.5, highlight_size_factor=100, **args):
6+
def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_dict=None, xlabel='Rank', ylabel='Values', title='Rank Plot', ax=None, dot_size=1.5, highlight_size_factor=100, txt_font_size=8, **args):
77
"""
88
Plot the ranks against their values with specified color.
99
@@ -22,6 +22,7 @@ def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_
2222
ax (matplotlib.axes.Axes, optional): The axis object to plot on. If not provided, a new axis will be created.
2323
dot_size (float, optional): The size of the dots in the scatter plot. Default is 1.5.
2424
highlight_size_factor (int, optional): The size factor for the highlighted dots. Default is 100.
25+
txt_font_size (int, optional): The font size for the text labels. Default is 8.
2526
**args: Additional keyword arguments to be passed to the scatter plot.
2627
2728
Returns:
@@ -57,7 +58,7 @@ def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_
5758

5859
if highlight_values['text'] is not False:
5960
for i, row in highlight_ranks.iterrows():
60-
ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=8, color=highlight_color, ha='right')
61+
ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=txt_font_size, color=highlight_color, ha='right')
6162

6263
# Add labels and title
6364
ax.set_xlabel(xlabel)

0 commit comments

Comments
 (0)