# Assumptions built in to this code:
# - urlopen will never fail
# - chemicalName is all on one line with no markup in between tags
def get_pdb_ligands(code):
"""Return list of ligand names for given PDB code."""
import urllib.request, io
url = "http://www.rcsb.org/pdb/rest/ligandInfo?structureId=%s" % code
ligands = list()
with io.TextIOWrapper(urllib.request.urlopen(url)) as f:
for line in f:
extract_ligand(line, ligands)
return ligands
def extract_ligand(line, ligand_list):
"""Extract ligand name if given line contains a chemical name."""
start_tag = "<chemicalName>"
end_tag = "</chemicalName>"
tag_index = line.find(start_tag)
if tag_index < 0:
return
end_tag_index = line.find(end_tag)
if end_tag_index < 0:
return
name_index = tag_index + len(start_tag)
name = line[name_index:end_tag_index].strip()
ligand_list.append(name)
print(get_pdb_ligands("4HHB"))
print(get_pdb_ligands("3FX2"))