Skip to content

Commit 7777e1b

Browse files
authored
Merge pull request #95 from ArcInstitute/dev
minor fixes
2 parents 5a54c5d + 7857245 commit 7777e1b

File tree

6 files changed

+54
-32
lines changed

6 files changed

+54
-32
lines changed

docs/environment.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ dependencies:
2222
- sphinx=5.3.0
2323
- sphinx_rtd_theme=1.1.1
2424
- sphinxcontrib-bibtex
25+
- polars>0.20
2526
- pip
2627
- pip:
27-
- polars
28+
- pyarrow
2829
- biobear
2930
- numba
3031
- pydeseq2
3132
- simple_colors
33+
- adjustText
3234
- watermark

environment.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ dependencies:
1818
- ipykernel
1919
- mscorefonts
2020
- rust>=1.72
21+
- polars>0.20
2122
- pip
2223
- pip:
23-
- polars
2424
- pyarrow
2525
- biobear
2626
- numba
2727
- pydeseq2
2828
- simple_colors
29+
- adjustText
2930
- watermark

screenpro/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,6 @@
3131
from .dashboard import DrugScreenDashboard
3232

3333

34-
__version__ = "0.4.13"
34+
__version__ = "0.4.14"
3535
__author__ = "Abe Arab"
3636

screenpro/phenoscore/_annotate.py

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@
2121
}
2222

2323

24-
def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='negative_control'):
24+
def getCombinedScore(df_in, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'):
2525
"""
2626
Calculate the combined score column based on the given phenotypic scores and p-values.
2727
Combined score is calculated as:
2828
2929
$combined\_score = \frac{score}{pseudo\_sd} \times -\log_{10}(pvalue)$
3030
3131
Parameters:
32-
df (pandas.DataFrame): The input DataFrame.
32+
df_in (pandas.DataFrame): The input DataFrame.
3333
score_col (str): The column name for the individual scores. Default is 'score'.
3434
pvalue_col (str): The column name for the p-values. Default is 'pvalue'.
3535
target_col (str): The column name for the target variable. Default is 'target'.
@@ -39,18 +39,22 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='neg
3939
Returns:
4040
pandas.Series: The calculated combined score column.
4141
"""
42-
if 'target' not in df.columns:
43-
raise ValueError('Column "target" not found in the input DataFrame.')
42+
# make a copy of input dataframe
43+
df = df_in.copy()
44+
45+
for col in [score_col, pvalue_col, target_col]:
46+
if col not in df.columns:
47+
raise ValueError(f'Column "{col}" not found in the input DataFrame.')
4448

4549
# calculate pseudo_sd
46-
pseudo_sd = df[df['target'].eq(ctrl_label)][score_col].tolist()
50+
pseudo_sd = df[df[target_col].eq(ctrl_label)][score_col].tolist()
4751
pseudo_sd = np.std(pseudo_sd)
4852

4953
# calculate combined score
5054
return df[score_col]/pseudo_sd * -np.log10(df[pvalue_col])
5155

5256

53-
def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='negative_control'):
57+
def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'):
5458
"""
5559
Annotate the given score tabel
5660
@@ -60,49 +64,47 @@ def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalu
6064
up_hit (str): up hit label
6165
down_hit (str): down hit label
6266
threshold (int): threshold value
63-
score_col (str): score column name
64-
pvalue_col (str): pvalue column name
65-
ctrl_label (str): control label value
67+
score_col (str): score column name. Default is 'score'.
68+
target_col (str): column name for the target variable. Default is 'target'.
69+
pvalue_col (str): pvalue column name. Default is 'pvalue'.
70+
ctrl_label (str): control label value. Default is 'negative_control'.
6671
6772
Returns:
6873
pd.DataFrame: annotated score dataframe
6974
"""
70-
if score_col is None: score_col = 'score'
71-
if pvalue_col is None: pvalue_col = 'pvalue'
75+
# make a copy of input dataframe
76+
df = df_in.copy()
7277

73-
sel = ['target',score_col, pvalue_col]
74-
75-
for col in sel:
76-
if col not in df_in.columns:
78+
for col in [score_col, pvalue_col, target_col]:
79+
if col not in df.columns:
7780
raise ValueError(f'Column "{col}" not found in the input DataFrame.')
78-
79-
# make a copy of input dataframe
80-
df = df_in[sel].copy()
81-
# # rename/reformat columns
82-
# df.columns = ['target', 'score', 'pvalue']
81+
8382
df[score_col] = df[score_col].astype(float)
8483
df[pvalue_col] = df[pvalue_col].astype(float)
8584

8685
# add combined score column
87-
df['combined_score'] = getCombinedScore(df, score_col, pvalue_col, ctrl_label)
86+
df['combined_score'] = getCombinedScore(
87+
df,
88+
score_col=score_col, pvalue_col=pvalue_col, target_col=target_col,
89+
ctrl_label=ctrl_label)
8890

8991
# add label column
9092
df['label'] = '.'
9193

9294
# annotate hits: up
9395
df.loc[
94-
(df[score_col] > 0) & (~df['target'].eq(ctrl_label)) &
96+
(df[score_col] > 0) & (~df[target_col].eq(ctrl_label)) &
9597
(df['combined_score'] >= threshold), 'label'
9698
] = up_hit
9799

98100
# annotate hits: down
99101
df.loc[
100-
(df[score_col] < 0) & (~df['target'].eq(ctrl_label)) &
102+
(df[score_col] < 0) & (~df[target_col].eq(ctrl_label)) &
101103
(df['combined_score'] <= -threshold), 'label'
102104
] = down_hit
103105

104106
# annotate control
105-
df.loc[df['target'].eq(ctrl_label), 'label'] = ctrl_label
107+
df.loc[df[target_col].eq(ctrl_label), 'label'] = ctrl_label
106108

107109
# annotate non-hit
108110
df.loc[df['label'] == '.', 'label'] = 'target_non_hit'

screenpro/phenoscore/delta.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ def compareByTargetGroup(adata, df_cond_ref, df_cond_test, keep_top_n, var_names
153153

154154
# combine results into a dataframe
155155
result = pd.concat([
156-
pd.Series(scores, name='score'),
157-
pd.Series(p_values, name=f'{test} pvalue'),
158-
pd.Series(adj_p_values, name='BH adj_pvalue'),
159-
pd.Series(target_sizes, name='number_of_guide_elements'),
156+
pd.Series(scores, name='score', dtype=float),
157+
pd.Series(p_values, name=f'{test} pvalue', dtype=float),
158+
pd.Series(adj_p_values, name='BH adj_pvalue', dtype=float),
159+
pd.Series(target_sizes, name='number_of_guide_elements', dtype=int),
160160
], axis=1)
161161

162162
# add targets information

screenpro/plotting/_rank.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import pandas as pd
22
import matplotlib.pyplot as plt
3+
4+
from adjustText import adjust_text
35
from ._utils import yellow_blue
46

57

@@ -57,8 +59,23 @@ def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_
5759
ax.plot(highlight_ranks['Rank'], highlight_ranks[rank_col], 'o', color=highlight_color, markersize=dot_size * highlight_size_factor)
5860

5961
if highlight_values['text'] is not False:
62+
texts = []
6063
for i, row in highlight_ranks.iterrows():
61-
ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=txt_font_size, color=highlight_color, ha='right')
64+
t = ax.text(
65+
row['Rank'] + .01,
66+
row[rank_col] + .001,
67+
row[name_col],
68+
fontsize=txt_font_size,
69+
color=highlight_color,
70+
ha='right'
71+
)
72+
texts.append(t)
73+
74+
adjust_text(
75+
texts,
76+
arrowprops=dict(arrowstyle='-', color=highlight_color, lw=0.5),
77+
ax=ax
78+
)
6279

6380
# Add labels and title
6481
ax.set_xlabel(xlabel)

0 commit comments

Comments
 (0)