Python
完成品
以下のようなVolcano plotをPythonで書いていきます。
Script
以下をコピペで上記のグラフが出てきます。
import pandas as pd
url="https://raw.githubusercontent.com/vappiah/bioinfoscripts/main/testvolcano.csv"
c=pd.read_csv(url)
def volcanoplot(de, foldchange, pvalue, featurename, label=10):
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from adjustText import adjust_text
topfeature = []
de = de.sort_values(pvalue)
de["logpvalue"] = np.log10(de[pvalue]) * -1
conditionlist = [
(de[foldchange] > 1) & (de[pvalue] < 0.1),
(de[foldchange] < -1) & (de[pvalue] < 0.1),
]
choicelist = ['UP', 'DOWN']
de['diffexpressed'] = np.select(conditionlist, choicelist, default='NaN')
up = de[de["diffexpressed"]=="UP"]
up = up.iloc[0:label, ]
topfeature = up[featurename]
down = de[de["diffexpressed"]=="DOWN"]
down = down.iloc[0:label, ]
topfeature = topfeature.append(down[featurename])
color_dict = dict({'UP':'red',
'DOWN':'blue',
'NaN':'black'
})
ax = sns.scatterplot(data=de, x=foldchange, y="logpvalue", hue="diffexpressed", palette=color_dict, markers='o', s=10)
de = de[de[featurename].isin(topfeature)]
texts = []
for x, y, s in zip(de[foldchange], de["logpvalue"], de[featurename]):
texts.append(plt.text(x, y, s))
adjust_text(texts)
return (ax)
myvolcanoplot.volcanoplot(c, "log2FC", "p-value", "GeneNames", label=5)
Script説明
データフレームのインポート
Volcanoplotとは直接関係ないですが、データは以下を使用しました。
https://github.com/vappiah/bioinfoscripts/blob/main/testvolcano.csv
pandasでcsvをcに入れていきます。
import pandas as pd
url="https://raw.githubusercontent.com/vappiah/bioinfoscripts/main/testvolcano.csv"
c=pd.read_csv(url)
| GeneNames | log2FC | p-value |
|---|---|---|
| 0LOC_Os09g01000.1 | -1.886539 | 1.250000e-55 |
| … | … | … |
Volcanoplotでは、以下のカラムが必要になります。
①遺伝子名
②Log Fold Change
③p-value またはq-value
必要なModuleをインポート
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from adjustText import adjust_text