代谢物分类一直没有合适的参考

从KEGG下载brite的json文件,读取并整理表格,这里没有使用list操作,而是找规律解决了多层嵌套list的数据结构

library(rlist)
library(dplyr)
library(tidyverse)
library(data.table)

rm(list = ls())
KEGG_Category<-list.load('br08021.json')[['children']]

df<-t(as.data.frame(KEGG_Category)) %>% as.data.frame()
df$num <- 1:nrow(df)
df1 <- df[c(grep('^name',rownames(df)),grep("^C\\d+",df$V1)),] %>% arrange(num)
df1$num <- 1:nrow(df1)
num <- diff(c(grep('^name',rownames(df1)),nrow(df1)+1))#计算每一类的数目
class <- df$V1[grep('^name',rownames(df))]
c1 <- NULL
for (i in 1:length(class)) {
  c2 <- NULL
  cc <- rep(class[i],num[i])
  c2 <- c(c2,cc)
  c1 <- c(c1,c2)
}
d1 <- cbind(df1,c1)
d2 <- d1[grep("^C\\d+",d1$V1),]
saveRDS(d2,file = 'br08021.rds')
# 
data <- data.frame(
  Var1 = c("Antibiotics", "Carbohydrates", "Hormones and transmitters", "Lipids", 
           "Nucleic acids", "Organic acids", "Peptides", "Steroids", 
           "Vitamins and cofactors"),
  Freq = c(51, 349, 82, 3596, 51, 40, 192, 86, 44)
)
ggplot(data, aes(x = "", y = Freq, fill = Var1)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  theme_void() +
  scale_fill_manual(values = colors) +
  labs(title = "Compounds Classification", x = NULL, y = NULL) +
  theme(legend.title = element_blank())

{r, fig.height = 8,fig.with = 6} plot(1:10) ​