REST API,全称Representational State Transfer API,是一种网络应用程序的设计风格和开发方式,它基于HTTP协议,可以使用XML或者JSON格式传输数据,使得用户可以通过编程语言轻松访问网络资源。换言之,用户可以简单的对这一API的网址发送一个请求,就可以以json文档或xml文档的形式获得所需数据,其远比本地构建数据库或使用网络爬虫要灵活得多。
## 基于ensemble RUST API的基因长度批量获取函数。 # 参考:https://rest.ensembl.org/documentation/info/lookup_post # 这里获取的是基因的总长度,即start-end,并非转录本长度。 defgetEnsembleGeneLength(ensembl_id_list,return_rawjson=False): # @param ensembl_id_list: A list of query ensembl ID # @param return_rawjson: if True, return raw response json;else, return a list of gene length # @return A list of gene length corresponding to the query ensembl ID list,or raw json url = f"https://rest.ensembl.org/lookup/id?expand=0" payload = { "ids" : ensembl_id_list } headers = { "Content-Type" : "application/json", "Accept" : "application/json"} response = requests.post(url, headers=headers, data=json.dumps(payload)) if response.status_code != 200: returnNone else: data = response.json() if(return_rawjson): return(data) gene_length_list = [] for ensembl_id in ensembl_id_list: if(ensembl_id in data): try: data1 = data[ensembl_id] gene_length_list.append(abs(data1["start"]-data1["end"])) except Exception as e: print(f"Warning: gene {ensembl_id} have no length info. message: {e}") gene_length_list.append(None) else: gene_length_list.append(None) return gene_length_list
## 基于ensemble RUST API的基因长度批量获取函数。 # 参考:https://rest.ensembl.org/documentation/info/lookup_post # 这里获取的是基因的transcript长度. import pandas as pd defgetEnsembleTransLength(ensembl_id_list,return_rawjson=False,return_transcript_df=False): # @param ensembl_id_list: A list of query ensembl ID # @param return_rawjson: if True, return raw response json;else, return a list of gene length # @param return_transcript_df: if True, return all genes' all transcripts' length as a list of dataframe; else, only return the length of the longest transcript of each gene # @return A list of gene length corresponding to the query ensembl ID list url = f"https://rest.ensembl.org/lookup/id?expand=1" payload = { "ids" : ensembl_id_list } headers = { "Content-Type" : "application/json", "Accept" : "application/json"} response = requests.post(url, headers=headers, data=json.dumps(payload)) if response.status_code != 200: return (None,None) else: data = response.json() if(return_rawjson): return(data) gene_length_list = [] max_transcript_length_list= [] transcript_length_df_list = [] for ensembl_id in ensembl_id_list: if(ensembl_id in data): try: data1 = data[ensembl_id] gene_length_list.append(abs(data1["start"]-data1["end"])) transcript_list = data1["Transcript"] transcript_id_list = [] transcript_length_list = [] for transcript in transcript_list: if("Exon"in transcript): transcript_id_list.append(transcript["id"]) exons = transcript["Exon"] transcript_length = 0 for exon in exons: transcript_length += abs(exon["start"]-exon["end"]) transcript_length_list.append(transcript_length) max_transcript_length_list.append(np.max(transcript_length_list)) if(return_transcript_df): df = pd.DataFrame({"id":transcript_id_list,"length":transcript_length_list}) #display(df) transcript_length_df_list.append(df) except Exception as e: print(f"Warning: gene {ensembl_id} have no transcript length info. message: {e}") gene_length_list.append(None) max_transcript_length_list.append(None) transcript_length_df_list.append(None) else: gene_length_list.append(None) max_transcript_length_list.append(None) transcript_length_df_list.append(None) if(return_transcript_df): return (gene_length_list,transcript_length_df_list) else: return (gene_length_list,max_transcript_length_list)