#!/usr/bin/python3

import requests, sys, os, json

rcsburl='https://data.rcsb.org/graphql?query='

queryA = '''
{
  entries(entry_ids: [
'''

queryB = '''
]) {
    rcsb_id
    rcsb_entry_container_identifiers {
      entry_id
    }
    rcsb_entry_info {
      resolution_combined
    }
    polymer_entities {
      entity_poly {
        pdbx_seq_one_letter_code_can
        rcsb_entity_polymer_type
        rcsb_sample_sequence_length
        type
      }
      polymer_entity_instances {
        rcsb_polymer_entity_instance_container_identifiers {
          asym_id
          auth_asym_id
        }
      }
      rcsb_cluster_membership {
        cluster_id
        identity
      }
      rcsb_polymer_entity {
        formula_weight
      }
      rcsb_polymer_entity_container_identifiers {
        auth_asym_ids
        entity_id
        entry_id
        reference_sequence_identifiers {
          database_accession
          database_name
        }
      }
    }
  }
}
'''

outjson = {}

try:
  with open('rcsb_all_DNA+RNA_within_3.5A_xray_with_data.txt_short', 'r') as reader:
    pdbids = reader.readlines()

  numids = len(pdbids)
  startidx=0
  idx=startidx
  stride=100
  outlines=''

  while idx <= numids-stride:
    ids=','.join(['"'+x.strip()+'"' for x in pdbids[idx:idx+stride]])
    r = requests.get(rcsburl + queryA + ids + queryB)
    outjson[idx] = r.json()

    idx += stride

  if idx < numids:
    ids=','.join(['"'+x.strip()+'"' for x in pdbids[idx:numids]])
    r = requests.get(rcsburl + queryA + ids + queryB)
    outjson[idx] = r.json()

except:
  raise


for batch in outjson:
  for entry in outjson[batch]["data"]["entries"]:
    for polymer in entry["polymer_entities"]:
      cluster_id = "N/A"
      print(polymer["entity_poly"]["rcsb_entity_polymer_type"])
      if (polymer["entity_poly"]["rcsb_entity_polymer_type"] == "Protein"):
        if (polymer["rcsb_cluster_membership"] is not None):
          for member in polymer["rcsb_cluster_membership"]:
            if (member["identity"] == 90):
              cluster_id = str(member["cluster_id"])
      else:
        for instance in polymer["polymer_entity_instances"]:
          print(entry["rcsb_id"].lower(), instance["rcsb_polymer_entity_instance_container_identifiers"]["asym_id"], polymer["entity_poly"]["pdbx_seq_one_letter_code_can"], cluster_id)
