from io import StringIO import json import pandas import requests # Genes of interest (in this case, genes involved in glycolysis according to # the KEGG pathways database [KEGG:hsa00010+M00001]). genes = [ "ENSG00000111640", "ENSG00000111669", "ENSG00000149925", "ENSG00000074800", "ENSG00000105220", "ENSG00000067225", "ENSG00000102144", "ENSG00000141959", "ENSG00000156515", "ENSG00000171314", "ENSG00000067057", "ENSG00000111674", "ENSG00000159322", "ENSG00000152556", "ENSG00000109107", "ENSG00000159399", "ENSG00000108515", "ENSG00000160883", "ENSG00000226784", "ENSG00000188316", "ENSG00000106633", "ENSG00000136872", "ENSG00000156510", "ENSG00000143627", "ENSG00000170950", ] # Get a summary on the ubiquity of the above gene set: response_summary = requests.post( "https://ubigen.uni-rostock.de/api/summary", headers={"Content-Type": "text/plain"}, data=" ".join(genes), ) summary = json.loads(response_summary.content) median_percentile = summary["median_percentile"] median_percentile_rounded = round(median_percentile, 3) estimated_change = summary["change"] p_value = summary["p_value"] # Example information: Median percentile of the genes and p-value for difference # in scores in comparison with other genes. print(f"Median percentile: {median_percentile_rounded * 100}%") print(f"Estimated score difference: {estimated_change} (p = {p_value})") # Retrieve detailed information on all parameters and the ranking of the gene # set defined above: response_ranking = requests.post( "https://ubigen.uni-rostock.de/api/ranking", headers={"Content-Type": "text/plain"}, data=" ".join(genes), ) # Parse the data using pandas. This gives lots of opportunity for further # analyses. data = pandas.read_csv(StringIO(response_ranking.text)) # Example analysis: Recompute the mean percentile using pandas. median_percentile_new = data["percentile"].median() median_percentile_new_rounded = round(median_percentile_new, 3) print(f"Recomputed median percentile: {median_percentile_new_rounded * 100}%")