#!/usr/bin/env python3 import requests import sys def fetchDataKeys(apiUrl): try: response = requests.get(apiUrl) try: data = response.json() except ValueError: print("Failed to parse JSON response.", file=sys.stderr) return [] if response.status_code != 200: errorMsg = data.get("error", f"HTTP {response.status_code}") print(f"API error: {errorMsg}", file=sys.stderr) return [] ### ignore these metaData 'keys' at the top level of the JSON structure metaKeys = { "downloadTime", "downloadTimeStamp", "browser", "q", "elapsedTimeMs", "itemCount", "totalMatchCount", "availableAssemblies", "maxItemsLimit", "statusCode", "statusMessage", "error" } dataKeys = [key for key in data if key not in metaKeys] return dataKeys except requests.exceptions.RequestException as e: print(f"Request failed: {e}", file=sys.stderr) return [] ######################################################################### if __name__ == "__main__": maxItemsOut = 4 if len(sys.argv) != 2: print(f"Usage: findGenome.py \n", file=sys.stderr) print(f"Using the UCSC API 'findGenome' function to search for an assembly", file=sys.stderr) print(f"in the UCSC genome browser system. maxItemsOutput is set to {maxItemsOut}", file=sys.stderr) print(f"to limit the result output to four answers.\nExample::", file=sys.stderr) print(f"\t./findGenome.py GCF_000001405.40", file=sys.stderr) print(f"will print the link to the genome browser:\n\thttps://genome.ucsc.edu/cgi-bin/hgTracks?db=hg38\n") print(f"Note the discussion for the 'findGenome' function in the help doc:", file=sys.stderr) print(f"\thttps://genome.ucsc.edu/goldenPath/help/api.html", file=sys.stderr) sys.exit(1) searchTerm = sys.argv[1] print(f"# given search term: '{searchTerm}'", file=sys.stderr) ### the search term given with the 'q=searchTerm' can be any word that ### may match something in the assembly identifiers or version strings. ### see also: the 'findGenome' endpoint discussion in: ### https://genome.ucsc.edu/goldenPath/help/api.html url = f"https://api.genome.ucsc.edu/findGenome?browser=mustExist;maxItemsOutput={maxItemsOut};q={searchTerm}" # print(f"{url}") keys = fetchDataKeys(url) ### search term may match to multiple browsers. The returned results have ### an hierarchical 'order' built into them. The most 'important' ones ### will be first. Use maxItemsOutput=1 to return only the single ### top priority result. for key in keys: print(f"https://genome.ucsc.edu/cgi-bin/hgTracks?db={key}")