In [2]:
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatOpenAI
import os
os.environ['OPENAI_API_KEY'] = "[Your API key]"

schema = {
    "properties": {
        "peptide_sequence": {"type": "string"},
        "self-assembly_phase": {"type": "string"},
        "N-terminal_modification": {"type": "string"},
        "C-terminal_modification": {"type": "string"},
        "Non-terminal_modification": {"type": "string"},
        "peptide_conjugate_mixture": {"type": "string"}, 
        "conjugate_partner": {"type": "string"},
        "solution": {"type": "string"},
        "PH": {"type": "string"},
        "temperature(celsius)": {"type": "integer"},
        "peptide_concentration(mg/ml)": {"type": "string"},
        "heating_cooling": {"type": "boolean"},
        "incubation_period(min)": {"type": "integer"},
       
    },
    "required": ["peptide_sequence", "solution", "self-assembly_phase", "N-terminal_modification", "C-terminal_modification",
                "Non-terminal_modification", "peptide_conjugate_mixture", "conjugate_partner", "PH", "temperature(celsius)",
                "peptide_concentration(mg/ml)",  "heating_cooling", "incubation_period(min)"],
}
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

chain = create_extraction_chain(schema, llm)

In [3]:
import json

data_path = "../../data/testset.jsonl"

# Load the dataset
with open(data_path, 'r', encoding='utf-8') as f:
    testset = [json.loads(line) for line in f]

In [4]:
testset

[{'messages': [{'role': 'system',
    'content': 'polypeptide self-assembly literature information extraction'},
   {'role': 'user',
    'content': 'Self-assembly of peptides and amyloid fibrils offers an appealing approach for \ncreating chiral nanostructures, which has promising applications in the fields \nof biology and materials science. Although numerous self-assembled chiral \nmaterials have been designed, the precise control of their twisting tendency and \ntheir handedness is still a challenge. Herein, we report the self-assembly of \nchiral nanostructures with precisely tailored architectures by changing the \namino acid sequences of the peptides. We designed a series of self-assembling \ntripeptides bearing different l-amino acid sequences. The peptide with \nl-Phe-l-Phe sequence preferred to self-assemble into left-handed nanohelices, \nwhile with l-Phe-l-Trp right-handed nanohelices would be formed. Moreover, the \ndiameter of the self-assembled nanohelices could be tailor

In [69]:
example1 = [{'peptide_sequence': 'Fmoc-FWH', 'self-assembly_phase': 'chiral nanostructures', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conjugate_mixture': '-', 'conjugate_partner': '-', 'solution': 'aqueous', 'PH': '6', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '2 mM', 'heating_cooling': False, 'incubation_period(min)': 1440}
,{'peptide_sequence': 'Fmoc-FWR', 'self-assembly_phase': 'chiral nanostructures', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conjugate_mixture': '-', 'conjugate_partner': '-', 'solution': '5:95 methanol/H2O', 'PH': '6', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '100 mM', 'heating_cooling': False, 'incubation_period(min)': None}
,{'peptide_sequence': 'Fmoc-FFR', 'self-assembly_phase': 'chiral nanostructures', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conjugate_mixture': '-', 'conjugate_partner': '-', 'solution': '5:95 methanol/H2O', 'PH': '6', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '100 mM', 'heating_cooling': False, 'incubation_period(min)': None}]
with open('prediction_pretrain.jsonl', 'w') as file:
    json.dump(example1, file)
    file.write("\n")
    
    

In [100]:
example23=[
{'peptide_sequence': 'Fmoc-Phe-Phe-Asp', 'self-assembly_phase': 'nanofibers', 'N-terminal_modification': 'N-(9-Fluorenylmethoxycarbonyl)-protected', 'C-terminal_modification': 'OH', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'ultra-pure H2O', 'PH': '8', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '8 mm', 'heating_cooling': False, 'incubation_period(min)': 30}
,{'peptide_sequence': 'Fmoc-Phe-Phe-Asp', 'self-assembly_phase': 'nanoparticles', 'N-terminal_modification': 'N-(9-Fluorenylmethoxycarbonyl)-protected', 'C-terminal_modification': 'OH', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'ultra-pure H2O', 'PH': '7', 'temperature(celsius)': 55, 'peptide_concentration(mg/ml)': '8 mm', 'heating_cooling': True, 'incubation_period(min)': 120}]
with open('prediction_pretrain.jsonl', 'a+') as file:
    json.dump(example23, file)
    file.write("\n")

In [101]:
import json

data_path = "prediction_pretrain.jsonl"

# Load the dataset
with open(data_path, 'r', encoding='utf-8') as f:
    pred = [json.loads(line) for line in f]
print(len(pred))

23


# Testing examples 1

In [None]:

result = chain.run(testset[0]['messages'][1]["content"])

In [11]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################")
for t in json.loads(testset[0]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'Fmoc-FWH', 'self-assembly_phase': 'chiral nanostructures', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conjugate_mixture': '-', 'conjugate_partner': '-', 'solution': 'aqueous', 'PH': '6', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '2 mM', 'heating_cooling': False, 'incubation_period(min)': 1440}
{'peptide_sequence': 'Fmoc-FWR', 'self-assembly_phase': 'chiral nanostructures', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conjugate_mixture': '-', 'conjugate_partner': '-', 'solution': '5:95 methanol/H2O', 'PH': '6', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '100 mM', 'heating_cooling': False, 'incubation_period(min)': None}
{'peptide_sequence': 'Fmoc-FFR', 'self-assembly_phase': 'chiral nanostructures', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 

# Testing examples 2

In [13]:

result = chain.run(testset[1]['messages'][1]["content"])

In [14]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################")
for t in json.loads(testset[1]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'Fmocdipeptides', 'self-assembly_phase': 'gelation', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': 'not specified', 'Non-terminal_modification': 'not specified', 'peptide_conjugate_mixture': 'not specified', 'conjugate_partner': 'not specified', 'solution': 'HPLC grade water', 'PH': '10.5', 'temperature(celsius)': 75, 'peptide_concentration(mg/ml)': 'not specified', 'heating_cooling': True, 'incubation_period(min)': 1}
{'peptide_sequence': 'Fmocdipeptide', 'self-assembly_phase': 'dissolution', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': 'not specified', 'Non-terminal_modification': 'not specified', 'peptide_conjugate_mixture': 'not specified', 'conjugate_partner': 'not specified', 'solution': 'HPLC grade water', 'PH': '10.5', 'temperature(celsius)': 75, 'peptide_concentration(mg/ml)': 'not specified', 'heating_cooling': True, 'incubation_period(min)': 1}
#######################
#######################
{'peptide_sequence': 'FG', 'se

# Testing examples 3

In [15]:

result = chain.run(testset[2]['messages'][1]["content"])

In [20]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

print(json.loads(testset[2]['messages'][2]['content']))

[{'peptide_sequence': 'naphthalene-diphenylalanine', 'self-assembly_phase': 'worm-like micelles', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'DMSO', 'PH': 'high', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '', 'heating_cooling': False, 'incubation_period(min)': None}]
#######################
#######################
{'peptide_sequence': 'FA', 'self-assembly_phase': 'nanosphere', 'N-terminal_modification': 'Napthalene', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '100 H2O', 'peptide_concentration(mg/ml)': '5.0', 'PH': '11.7', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': '90.0'}


In [17]:
testset[2]['messages'][2]['content']

'{"peptide_sequence": "FA", "self-assembly_phase": "nanosphere", "N-terminal_modification": "Napthalene", "C-terminal_modification": "Free", "Non-terminal_modification": "nan", "peptide_conjugate_mixture": "Peptide", "conjugate_partner": "nan", "solution": "100 H2O", "peptide_concentration(mg/ml)": "5.0", "PH": "11.7", "temperature(celsius)": "25.0", "heating_cooling": "No", "incubation_period(min)": "90.0"}'

# Testing examples 4

In [21]:

result = chain.run(testset[3]['messages'][1]["content"])

In [23]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[3]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'diphenylalanine', 'self-assembly_phase': 'nanotubes', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': '1,1,1,3,3,3-HFP', 'PH': '', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '50 or 100', 'heating_cooling': False, 'incubation_period(min)': None}]
#######################
#######################
{'peptide_sequence': 'FF', 'self-assembly_phase': 'tube', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '15/85 HFP/H2O', 'peptide_concentration(mg/ml)': '10.0', 'PH': '4.3', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': 'nan'}
{'peptide_sequence': 'FF', 'self-assembly_phase': 'tube', 'N-terminal_modification': 'Boc', 'C-terminal_modification': 'Free', 'Non-terminal_mo

# Testing examples 5

In [24]:
result = chain.run(testset[4]['messages'][1]["content"])

In [26]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[4]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'VFF', 'self-assembly_phase': 'self-assembled hydrogels', 'N-terminal_modification': 'L to D', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'phosphate buffer', 'PH': '7.4', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '', 'heating_cooling': False, 'incubation_period(min)': None}
{'peptide_sequence': 'FFV', 'self-assembly_phase': 'self-assembled hydrogels', 'N-terminal_modification': 'L to D', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'phosphate buffer', 'PH': '7.4', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '', 'heating_cooling': False, 'incubation_period(min)': None}
#######################
#######################
{'peptide_sequence': 'vFF', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-

# Testing examples 6

In [27]:
result = chain.run(testset[5]['messages'][1]["content"])

In [28]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[5]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'Boc-diphenylalanine', 'self-assembly_phase': 'nucleation process', 'N-terminal_modification': 'Boc', 'C-terminal_modification': 'COOH', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'ethanol', 'PH': '', 'temperature(celsius)': 20, 'peptide_concentration(mg/ml)': '7.5', 'heating_cooling': False, 'incubation_period(min)': 0}]
#######################
#######################
{'peptide_sequence': 'FF', 'self-assembly_phase': 'fiber', 'N-terminal_modification': 'Boc', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '10/90 Ethanol/H2O', 'peptide_concentration(mg/ml)': '0.2', 'PH': '7.0', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': '35.0'}
{'peptide_sequence': 'FF', 'self-assembly_phase': 'fiber', 'N-terminal_modification': 'Boc', 'C-terminal_modification': 'Free', 'Non-termi

# Testing examples 7

In [29]:
result = chain.run(testset[6]['messages'][1]["content"])

In [31]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

print(json.loads(testset[6]['messages'][2]['content']))

{'peptide_sequence': 'diphenylalanine', 'self-assembly_phase': '1D fibrils', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'organic-aqueous', 'PH': '', 'temperature(celsius)': 50, 'peptide_concentration(mg/ml)': '10', 'heating_cooling': True, 'incubation_period(min)': 10}
{'peptide_sequence': 'diphenylalanine', 'self-assembly_phase': '1D nanostructures', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': 'curcumin', 'conjugate_partner': '', 'solution': 'organic', 'PH': '', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '10-6', 'heating_cooling': False, 'incubation_period(min)': None}
{'peptide_sequence': 'bovine serum albumin', 'self-assembly_phase': 'thin film', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture

# Testing examples 8

In [32]:
result = chain.run(testset[7]['messages'][1]["content"])

In [35]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[7]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'dipeptide-conjugates', 'self-assembly_phase': 'hydrogelators', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': 'naphthalene-dipeptides', 'conjugate_partner': '', 'solution': 'water', 'PH': '10.7', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '0.5', 'heating_cooling': False, 'incubation_period(min)': 120}]
#######################
#######################
{'peptide_sequence': 'AA ', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': 'Napthalene', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Conjugate', 'conjugate_partner': 'Napthalene', 'solution': '100 H2O', 'peptide_concentration(mg/ml)': '5.0', 'PH': '3.4', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': '1440.0'}
{'peptide_sequence': 'AA', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': 'Napthalene', 'C-termi

# Testing examples 9

In [36]:
result = chain.run(testset[8]['messages'][1]["content"])

In [37]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[8]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'dipeptides', 'self-assembly_phase': 'self-assembling', 'C-terminal_modification': 'glutamic acid', 'solution': 'water-soluble', 'PH': '2-12', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': None, 'heating_cooling': None, 'incubation_period(min)': None}]
#######################
#######################
{'peptide_sequence': 'VG', 'self-assembly_phase': 'nanosphere', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '100 H2O', 'peptide_concentration(mg/ml)': '10.45', 'PH': '6.96', 'temperature(celsius)': '37.0', 'heating_cooling': 'No', 'incubation_period(min)': '2160.0'}
{'peptide_sequence': 'LG', 'self-assembly_phase': 'nanosphere', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '1

# Testing examples 10

In [38]:
result = chain.run(testset[9]['messages'][1]["content"])

In [39]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[9]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'amphiphilic tripeptides', 'self-assembly_phase': 'nanostructure', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': '', 'PH': '', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': '', 'heating_cooling': False, 'incubation_period(min)': None}]
#######################
#######################
{'peptide_sequence': 'FGD', 'self-assembly_phase': 'none', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '100 H2O', 'peptide_concentration(mg/ml)': '6.75', 'PH': '7.0', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': '1440.0'}
{'peptide_sequence': 'FAD', 'self-assembly_phase': 'none', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification':

# Testing examples 11

In [40]:
result = chain.run(testset[10]['messages'][1]["content"])

In [41]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[10]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'ditryptophan dipeptide (WW)', 'self-assembly_phase': 'fibrils', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': 'amphiphilic, rod-coil', 'peptide_conjugate_mixture': 'peptide-DNA hybrid', 'conjugate_partner': 'synthetic single stranded nucleotide sequence', 'solution': 'dilute aqueous solution', 'PH': 'not specified', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': 'not specified', 'heating_cooling': False, 'incubation_period(min)': 1440}]
#######################
#######################
{'peptide_sequence': 'WW', 'self-assembly_phase': 'nanosphere', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '2/98 HFP/H2O', 'peptide_concentration(mg/ml)': '0.35', 'PH': '7.5', 'temperature(celsius)': '37.0', 'heating_cooling': 'No', 'incubation_period(min)': '1440.0'}
{'peptide_sequence

# Testing examples 12

In [42]:
result = chain.run(testset[11]['messages'][1]["content"])

In [43]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[11]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'L-diphenylalanine peptide', 'self-assembly_phase': 'nanotubes', 'peptide_concentration(mg/ml)': '100 mg/ml', 'heating_cooling': True, 'incubation_period(min)': 90}
{'peptide_sequence': 'L-dileucine peptide', 'self-assembly_phase': 'nanotubes', 'peptide_concentration(mg/ml)': '100 mg/ml', 'heating_cooling': True, 'incubation_period(min)': 180}
{'peptide_sequence': 'LL nanotubes', 'self-assembly_phase': 'nanotubes', 'solution': 'water', 'peptide_concentration(mg/ml)': '20 mg/ml', 'heating_cooling': True, 'incubation_period(min)': 60}
{'peptide_sequence': 'FF nanotubes', 'self-assembly_phase': 'nanotubes', 'solution': 'water', 'peptide_concentration(mg/ml)': '20 mg/ml', 'heating_cooling': True, 'incubation_period(min)': 60}
{'peptide_sequence': 'FF nanotubes', 'self-assembly_phase': 'nanotubes', 'solution': 'water', 'peptide_concentration(mg/ml)': '12.6 mg/ml', 'heating_cooling': True, 'incubation_period(min)': 60}
{'peptide_sequence': 'FF nanotubes', 'self-assembly_

# Testing examples 13

In [44]:
result = chain.run(testset[12]['messages'][1]["content"])

In [45]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[12]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'diphenylalanine', 'self-assembly_phase': 'efficiently self-assembles into discrete, well-ordered nanotubes', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'aqueous', 'PH': '', 'temperature(celsius)': 90, 'peptide_concentration(mg/ml)': '100', 'heating_cooling': True, 'incubation_period(min)': 60}]
#######################
#######################
{'peptide_sequence': 'FF', 'self-assembly_phase': 'tube', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '2/98 HFP/H2O', 'peptide_concentration(mg/ml)': '2.0', 'PH': '5.2', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': '5.0'}
{'peptide_sequence': 'FF', 'self-assembly_phase': 'tube', 'N-terminal_modification': 'Free', 'C-terminal_modi

# Testing examples 14

In [46]:
result = chain.run(testset[13]['messages'][1]["content"])

In [48]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[13]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'Fmoc–Phe–Phe', 'self-assembly_phase': 'gelation', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conjugate_mixture': '-', 'conjugate_partner': '-', 'solution': 'Milli-Q-puriﬁed water', 'PH': '-', 'temperature(celsius)': 12, 'peptide_concentration(mg/ml)': '15 mM', 'heating_cooling': False, 'incubation_period(min)': 432}
{'peptide_sequence': 'Fmoc–Phe–Phe', 'self-assembly_phase': 'gelation', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conjugate_mixture': '-', 'conjugate_partner': '-', 'solution': 'Milli-Q-puriﬁed water', 'PH': '-', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '15 mM', 'heating_cooling': False, 'incubation_period(min)': 432}
{'peptide_sequence': 'Fmoc–Phe–Phe', 'self-assembly_phase': 'gelation', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': '-', 'Non-terminal_modification': '-', 'peptide_conj

# Testing examples 15

In [49]:
result = chain.run(testset[14]['messages'][1]["content"])

In [50]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[14]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'NapFF', 'self-assembly_phase': 'self-assembled', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': 'carboxylic acid', 'Non-terminal_modification': 'Boc', 'peptide_conjugate_mixture': 'NapFFKK, NapFFFKK, NapFFOO, NapFFK′K′', 'conjugate_partner': 'naphthalene', 'solution': 'deionized water', 'PH': '9', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '2.0', 'heating_cooling': False, 'incubation_period(min)': 1440}
{'peptide_sequence': 'NapFFKK', 'self-assembly_phase': 'self-assembled', 'N-terminal_modification': 'Fmoc', 'C-terminal_modification': 'carboxylic acid', 'Non-terminal_modification': 'Boc', 'peptide_conjugate_mixture': 'NapFFKK, NapFFFKK, NapFFOO, NapFFK′K′', 'conjugate_partner': 'naphthalene', 'solution': 'deionized water', 'PH': '9', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '2.0', 'heating_cooling': False, 'incubation_period(min)': 1440}
{'peptide_sequence': 'NapFFFKK', 'self-assembly_phase': 'self-assembled', 'N

# Testing examples 16

In [51]:
result = chain.run(testset[15]['messages'][1]["content"])

In [52]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[15]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'Fmoc-FWK', 'self-assembly_phase': 'chiral structures', 'N-terminal_modification': 'N-(9-fluorenylmethoxycarbonyl)', 'C-terminal_modification': 'zwitterionic lysine', 'Non-terminal_modification': 'aromatic phenylalanine-tryptophan dipeptide', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'aqueous', 'PH': '11.5', 'temperature(celsius)': 60, 'peptide_concentration(mg/ml)': '7', 'heating_cooling': True, 'incubation_period(min)': 144}
{'peptide_sequence': 'Fmoc-FWK-NH2', 'self-assembly_phase': 'nanofibers', 'N-terminal_modification': 'N-(9-fluorenylmethoxycarbonyl)', 'C-terminal_modification': 'zwitterionic lysine', 'Non-terminal_modification': 'aromatic phenylalanine-tryptophan dipeptide', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'aqueous', 'PH': 'not mentioned', 'temperature(celsius)': None, 'peptide_concentration(mg/ml)': 'not mentioned', 'heating_cooling': False, 'incubation_period(min)': None}
##################

# Testing examples 17

In [54]:
result = chain.run(testset[16]['messages'][1]["content"])

In [55]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[16]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'FFK', 'N-terminal_modification': 'N terminal-protected', 'C-terminal_modification': 'OH', 'Non-terminal_modification': 'None', 'peptide_conjugate_mixture': 'None', 'conjugate_partner': 'None', 'solution': 'water/HFIP', 'PH': '5-6', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '1.0', 'heating_cooling': False, 'incubation_period(min)': 100}
{'peptide_sequence': 'FYK', 'N-terminal_modification': 'N terminal-protected', 'C-terminal_modification': 'OH', 'Non-terminal_modification': 'None', 'peptide_conjugate_mixture': 'None', 'conjugate_partner': 'None', 'solution': 'water/HFIP', 'PH': '5-6', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '5.0', 'heating_cooling': False, 'incubation_period(min)': 100}
{'peptide_sequence': 'YFK', 'N-terminal_modification': 'N terminal-protected', 'C-terminal_modification': 'OH', 'Non-terminal_modification': 'None', 'peptide_conjugate_mixture': 'None', 'conjugate_partner': 'None', 'solution': 'water/HFIP', 'PH

# Testing examples 18

In [56]:
result = chain.run(testset[17]['messages'][1]["content"])

In [57]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[17]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'ultrashort peptides', 'self-assembly_phase': 'hydrogelation', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'physiological pH', 'PH': '', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '1 mg/ml', 'heating_cooling': False, 'incubation_period(min)': 15}]
#######################
#######################
{'peptide_sequence': 'fFV', 'self-assembly_phase': 'fiber', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '100 PBS', 'peptide_concentration(mg/ml)': '7.0', 'PH': '7.4', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': 'nan'}
{'peptide_sequence': 'Ffv', 'self-assembly_phase': 'fiber', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_m

# Testing examples 19

In [58]:
result = chain.run(testset[18]['messages'][1]["content"])

In [59]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[18]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'Fmoc-FF', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'DMSO', 'PH': '', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '100', 'heating_cooling': False, 'incubation_period(min)': 1}
{'peptide_sequence': 'Fmoc-(2-naphthyl)-L-alanine', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'DMSO', 'PH': '', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '100', 'heating_cooling': False, 'incubation_period(min)': 1}
{'peptide_sequence': 'Fmoc-phenylalanine-proline', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '

# Testing examples 20

In [60]:
result = chain.run(testset[19]['messages'][1]["content"])

In [61]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[19]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': '(D)LFF', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'aqueous', 'PH': 'physiological', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': 'not specified', 'heating_cooling': False, 'incubation_period(min)': 0}]
#######################
#######################
{'peptide_sequence': 'LFF', 'self-assembly_phase': 'nanosphere', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '100 PBS', 'peptide_concentration(mg/ml)': '10.0', 'PH': '7.4', 'temperature(celsius)': '25.0', 'heating_cooling': 'No', 'incubation_period(min)': '5.0'}
{'peptide_sequence': 'lFF', 'self-assembly_phase': 'hydrogel', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_m

# Testing examples 21

In [62]:
result = chain.run(testset[20]['messages'][1]["content"])

In [63]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[20]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'diphenylalanine', 'self-assembly_phase': 'nanostructures', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': '', 'PH': '', 'temperature(celsius)': 0, 'peptide_concentration(mg/ml)': '', 'heating_cooling': False, 'incubation_period(min)': 0}
{'peptide_sequence': 'tryptophan', 'self-assembly_phase': 'nanostructures', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': '', 'PH': '', 'temperature(celsius)': 0, 'peptide_concentration(mg/ml)': '', 'heating_cooling': False, 'incubation_period(min)': 0}
{'peptide_sequence': 'cyclo-dipeptides', 'self-assembly_phase': 'nanostructures', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': ''

# Testing examples 22

In [64]:
result = chain.run(testset[21]['messages'][1]["content"])

In [65]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[21]['messages'][2]['content']):
    print(t)

[{'peptide_sequence': 'diphenylalanine', 'self-assembly_phase': 'ordered supramolecular structures', 'N-terminal_modification': '', 'C-terminal_modification': '', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'aqueous', 'PH': '', 'temperature(celsius)': 100, 'peptide_concentration(mg/ml)': '4.5 g/L', 'heating_cooling': True, 'incubation_period(min)': 30}]
#######################
#######################
{'peptide_sequence': 'FF', 'self-assembly_phase': 'tube', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-terminal_modification': 'nan', 'peptide_conjugate_mixture': 'Peptide', 'conjugate_partner': 'nan', 'solution': '100 H2O', 'peptide_concentration(mg/ml)': '0.605', 'PH': '7.0', 'temperature(celsius)': '4.0', 'heating_cooling': 'No', 'incubation_period(min)': 'nan'}
{'peptide_sequence': 'FF', 'self-assembly_phase': 'tube', 'N-terminal_modification': 'Free', 'C-terminal_modification': 'Free', 'Non-termin

# Testing examples 23

In [66]:
result = chain.run(testset[22]['messages'][1]["content"])

In [67]:

if len(result) > 1:
    for r in result:
        print(r)
else:
    print(result)
print("#######################")
print("#######################") 

for t in json.loads(testset[22]['messages'][2]['content']):
    print(t)

{'peptide_sequence': 'Fmoc-Phe-Phe-Asp', 'self-assembly_phase': 'nanofibers', 'N-terminal_modification': 'N-(9-Fluorenylmethoxycarbonyl)-protected', 'C-terminal_modification': 'OH', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'ultra-pure H2O', 'PH': '8', 'temperature(celsius)': 25, 'peptide_concentration(mg/ml)': '8 mm', 'heating_cooling': False, 'incubation_period(min)': 30}
{'peptide_sequence': 'Fmoc-Phe-Phe-Asp', 'self-assembly_phase': 'nanoparticles', 'N-terminal_modification': 'N-(9-Fluorenylmethoxycarbonyl)-protected', 'C-terminal_modification': 'OH', 'Non-terminal_modification': '', 'peptide_conjugate_mixture': '', 'conjugate_partner': '', 'solution': 'ultra-pure H2O', 'PH': '7', 'temperature(celsius)': 55, 'peptide_concentration(mg/ml)': '8 mm', 'heating_cooling': True, 'incubation_period(min)': 120}
#######################
#######################
{'peptide_sequence': 'FFD', 'self-assembly_phase': 'fiber', 'N-terminal_m