from k1lib.imports import *
Tries to download covid's spike protein structure file (.cif
file). Then scout out the file's structure, see some basic stats, then try to 3d plot it.
Link to spike protein: https://www.rcsb.org/structure/6vxx
File overview:
"6VXX.cif" | cat() | headOut(30)
data_6VXX # _entry.id 6VXX # _audit_conform.dict_location http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx.dic _audit_conform.dict_name mmcif_pdbx.dic _audit_conform.dict_version 5.337 # loop_ _database_2.database_id _database_2.database_code pdb 6VXX wwpdb D_1000247293 emdb EMD-21452 # loop_ _pdbx_database_related.db_name _pdbx_database_related.details _pdbx_database_related.db_id _pdbx_database_related.content_type EMDB 'Structure of the SARS-CoV-2 spike glycoprotein (closed state)' EMD-21452 'associated EM volume' EMDB 'Structure of the SARS-CoV-2 spike glycoprotein (open state)' EMD-21457 'other EM volume' # _pdbx_database_status.status_code REL _pdbx_database_status.status_code_sf ? _pdbx_database_status.status_code_mr ? _pdbx_database_status.entry_id 6VXX _pdbx_database_status.recvd_initial_deposition_date 2020-02-25 _pdbx_database_status.SG_entry y _pdbx_database_status.deposit_site RCSB
So, appears to be delimited by "#", and the ones with the "loop_" tag means that it's describing a table structure with those specified columns. Let's separate out those blocks:
blocks = "6VXX.cif" | cat() | grep("^#", sep=True).till() | deref() | aS(k1.Wrapper); blocks() | rows(1) | item(), blocks() | shape()
(['#', '_audit_conform.dict_location http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx.dic ', '_audit_conform.dict_name mmcif_pdbx.dic ', '_audit_conform.dict_version 5.337 '], (74, 2, 1))
Only 36 of them are in fact describing a table:
hasTable = filt(grep("loop_") | shape(0) | (op() > 0))
blocks() | hasTable | shape()
(36, 7, 1)
Let's grab the tables' fields:
blocks() | hasTable | filt(op().startswith("_")).all() | insertIdColumn(True) | apply("section " + op().ab_str(), 0)\
| transpose(fill="") | deref() | display(None)
section 0 section 1 section 2 section 3 section 4 section 5 section 6 section 7 section 8 section 9 section 10 section 11 section 12 section 13 section 14 section 15 section 16 section 17 section 18 section 19 section 20 section 21 section 22 section 23 section 24 section 25 section 26 section 27 section 28 section 29 section 30 section 31 section 32 section 33 section 34 section 35 _database_2.database_id _pdbx_database_related.db_name _audit_author.name _citation_author.citation_id _entity.details _entity_poly_seq.entity_id _struct_ref_seq.align_id _struct_ref_seq_dif.align_id _chem_comp.formula _struct_asym.details _struct_conf.conf_type_id _struct_conn.conn_type_id _struct_conn_type.criteria _struct_sheet.id _struct_sheet_order.sheet_id _struct_sheet_range.sheet_id _pdbx_struct_sheet_hbond.sheet_id _atom_type.symbol _atom_site.group_PDB _pdbx_poly_seq_scheme.asym_id _pdbx_nonpoly_scheme.asym_id _pdbx_struct_assembly_prop.biol_id _pdbx_audit_revision_history.ordinal _pdbx_audit_revision_details.ordinal _pdbx_audit_revision_group.ordinal _pdbx_audit_revision_category.ordinal _pdbx_audit_revision_item.ordinal _pdbx_validate_rmsd_bond.id _pdbx_validate_rmsd_angle.id _pdbx_validate_torsion.id _pdbx_unobs_or_zero_occ_residues.id _em_software.id _pdbx_branch_scheme.entity_id _pdbx_chem_comp_identifier.comp_id _pdbx_entity_branch_descriptor.entity_id _pdbx_entity_branch_list.entity_id _database_2.database_code _pdbx_database_related.details _audit_author.pdbx_ordinal _citation_author.name _entity.formula_weight _entity_poly_seq.hetero _struct_ref_seq.ref_id _struct_ref_seq_dif.pdbx_pdb_id_code _chem_comp.formula_weight _struct_asym.entity_id _struct_conf.id _struct_conn.details _struct_conn_type.id _struct_sheet.type _struct_sheet_order.range_id_1 _struct_sheet_range.id _pdbx_struct_sheet_hbond.range_id_1 _atom_site.id _pdbx_poly_seq_scheme.entity_id _pdbx_nonpoly_scheme.entity_id _pdbx_struct_assembly_prop.type _pdbx_audit_revision_history.data_content_type _pdbx_audit_revision_details.revision_ordinal _pdbx_audit_revision_group.revision_ordinal _pdbx_audit_revision_category.revision_ordinal _pdbx_audit_revision_item.revision_ordinal _pdbx_validate_rmsd_bond.PDB_model_num _pdbx_validate_rmsd_angle.PDB_model_num _pdbx_validate_torsion.PDB_model_num _pdbx_unobs_or_zero_occ_residues.polymer_flag _em_software.category _pdbx_branch_scheme.hetero _pdbx_chem_comp_identifier.identifier _pdbx_entity_branch_descriptor.descriptor _pdbx_entity_branch_list.hetero _pdbx_database_related.db_id _audit_author.identifier_ORCID _citation_author.ordinal _entity.id _entity_poly_seq.mon_id _struct_ref_seq.pdbx_PDB_id_code _struct_ref_seq_dif.mon_id _chem_comp.id _struct_asym.id _struct_conf.beg_label_comp_id _struct_conn.id _struct_conn_type.reference _struct_sheet.number_strands _struct_sheet_order.range_id_2 _struct_sheet_range.beg_label_comp_id _pdbx_struct_sheet_hbond.range_id_2 _atom_site.type_symbol _pdbx_poly_seq_scheme.seq_id _pdbx_nonpoly_scheme.mon_id _pdbx_struct_assembly_prop.value _pdbx_audit_revision_history.major_revision _pdbx_audit_revision_details.data_content_type _pdbx_audit_revision_group.data_content_type _pdbx_audit_revision_category.data_content_type _pdbx_audit_revision_item.data_content_type _pdbx_validate_rmsd_bond.auth_atom_id_1 _pdbx_validate_rmsd_angle.auth_atom_id_1 _pdbx_validate_torsion.auth_comp_id _pdbx_unobs_or_zero_occ_residues.occupancy_flag _em_software.details _pdbx_branch_scheme.asym_id _pdbx_chem_comp_identifier.type _pdbx_entity_branch_descriptor.type _pdbx_entity_branch_list.comp_id _pdbx_database_related.content_type _entity.src_method _entity_poly_seq.num _struct_ref_seq.pdbx_strand_id _struct_ref_seq_dif.pdbx_pdb_strand_id _chem_comp.mon_nstd_flag _struct_asym.pdbx_modified _struct_conf.beg_label_seq_id _struct_conn.ptnr1_label_asym_id _struct_sheet.details _struct_sheet_order.offset _struct_sheet_range.beg_label_seq_id _pdbx_struct_sheet_hbond.range_1_label_atom_id _atom_site.label_atom_id _pdbx_poly_seq_scheme.mon_id _pdbx_nonpoly_scheme.pdb_strand_id _pdbx_struct_assembly_prop.details _pdbx_audit_revision_history.minor_revision _pdbx_audit_revision_details.provider _pdbx_audit_revision_group.group _pdbx_audit_revision_category.category _pdbx_audit_revision_item.item _pdbx_validate_rmsd_bond.auth_asym_id_1 _pdbx_validate_rmsd_angle.auth_asym_id_1 _pdbx_validate_torsion.auth_asym_id _pdbx_unobs_or_zero_occ_residues.PDB_model_num _em_software.name _pdbx_branch_scheme.mon_id _pdbx_chem_comp_identifier.program _pdbx_entity_branch_descriptor.program _pdbx_entity_branch_list.num _entity.type _struct_ref_seq.seq_align_beg _struct_ref_seq_dif.seq_num _chem_comp.name _struct_asym.pdbx_blank_PDB_chainid_flag _struct_conf.pdbx_beg_PDB_ins_code _struct_conn.ptnr1_label_atom_id _struct_sheet_order.sense _struct_sheet_range.pdbx_beg_PDB_ins_code _pdbx_struct_sheet_hbond.range_1_label_comp_id _atom_site.label_comp_id _pdbx_poly_seq_scheme.ndb_seq_num _pdbx_nonpoly_scheme.ndb_seq_num _pdbx_audit_revision_history.revision_date _pdbx_audit_revision_details.type _pdbx_validate_rmsd_bond.auth_comp_id_1 _pdbx_validate_rmsd_angle.auth_comp_id_1 _pdbx_validate_torsion.auth_seq_id _pdbx_unobs_or_zero_occ_residues.auth_asym_id _em_software.version _pdbx_branch_scheme.num _pdbx_chem_comp_identifier.program_version _pdbx_entity_branch_descriptor.program_version _entity.pdbx_description _struct_ref_seq.pdbx_seq_align_beg_ins_code _struct_ref_seq_dif.pdbx_pdb_ins_code _chem_comp.type _struct_conf.beg_label_asym_id _struct_conn.ptnr1_label_comp_id _struct_sheet_range.beg_label_asym_id _pdbx_struct_sheet_hbond.range_1_label_asym_id _atom_site.label_seq_id _pdbx_poly_seq_scheme.pdb_seq_num _pdbx_nonpoly_scheme.pdb_seq_num _pdbx_audit_revision_details.description _pdbx_validate_rmsd_bond.auth_seq_id_1 _pdbx_validate_rmsd_angle.auth_seq_id_1 _pdbx_validate_torsion.PDB_ins_code _pdbx_unobs_or_zero_occ_residues.auth_comp_id _em_software.image_processing_id _pdbx_branch_scheme.pdb_asym_id _pdbx_entity_branch_descriptor.ordinal _entity.pdbx_number_of_molecules _struct_ref_seq.seq_align_end _struct_ref_seq_dif.pdbx_seq_db_name _chem_comp.pdbx_synonyms _struct_conf.beg_label_entity_id _struct_conn.ptnr1_label_seq_id _struct_sheet_range.beg_label_entity_id _pdbx_struct_sheet_hbond.range_1_label_seq_id _atom_site.label_alt_id _pdbx_poly_seq_scheme.auth_seq_num _pdbx_nonpoly_scheme.auth_seq_num _pdbx_audit_revision_details.details _pdbx_validate_rmsd_bond.PDB_ins_code_1 _pdbx_validate_rmsd_angle.PDB_ins_code_1 _pdbx_validate_torsion.label_alt_id _pdbx_unobs_or_zero_occ_residues.auth_seq_id _em_software.fitting_id _pdbx_branch_scheme.pdb_seq_num _entity.pdbx_mutation _struct_ref_seq.pdbx_seq_align_end_ins_code _struct_ref_seq_dif.pdbx_seq_db_accession_code _struct_conf.beg_auth_comp_id _struct_conn.ptnr1_auth_asym_id _struct_sheet_range.beg_auth_comp_id _pdbx_struct_sheet_hbond.range_1_PDB_ins_code _atom_site.pdbx_PDB_ins_code _pdbx_poly_seq_scheme.pdb_mon_id _pdbx_nonpoly_scheme.pdb_mon_id _pdbx_validate_rmsd_bond.label_alt_id_1 _pdbx_validate_rmsd_angle.label_alt_id_1 _pdbx_validate_torsion.phi _pdbx_unobs_or_zero_occ_residues.PDB_ins_code _em_software.imaging_id _pdbx_branch_scheme.pdb_mon_id _entity.pdbx_fragment _struct_ref_seq.pdbx_db_accession _struct_ref_seq_dif.db_mon_id _struct_conf.beg_auth_seq_id _struct_conn.ptnr1_auth_comp_id _struct_sheet_range.beg_auth_seq_id _pdbx_struct_sheet_hbond.range_1_auth_atom_id _atom_site.label_asym_id _pdbx_poly_seq_scheme.auth_mon_id _pdbx_nonpoly_scheme.auth_mon_id _pdbx_validate_rmsd_bond.auth_atom_id_2 _pdbx_validate_rmsd_angle.auth_atom_id_2 _pdbx_validate_torsion.psi _pdbx_unobs_or_zero_occ_residues.label_asym_id _pdbx_branch_scheme.auth_asym_id _entity.pdbx_ec _struct_ref_seq.db_align_beg _struct_ref_seq_dif.pdbx_seq_db_seq_num _struct_conf.beg_auth_asym_id _struct_conn.ptnr1_auth_seq_id _struct_sheet_range.beg_auth_asym_id _pdbx_struct_sheet_hbond.range_1_auth_comp_id _atom_site.label_entity_id _pdbx_poly_seq_scheme.pdb_strand_id _pdbx_nonpoly_scheme.pdb_ins_code _pdbx_validate_rmsd_bond.auth_asym_id_2 _pdbx_validate_rmsd_angle.auth_asym_id_2 _pdbx_unobs_or_zero_occ_residues.label_comp_id _pdbx_branch_scheme.auth_seq_num _struct_ref_seq.pdbx_db_align_beg_ins_code _struct_ref_seq_dif.details _struct_conf.end_label_comp_id _struct_conn.ptnr1_symmetry _struct_sheet_range.end_label_comp_id _pdbx_struct_sheet_hbond.range_1_auth_asym_id _atom_site.Cartn_x _pdbx_poly_seq_scheme.pdb_ins_code _pdbx_validate_rmsd_bond.auth_comp_id_2 _pdbx_validate_rmsd_angle.auth_comp_id_2 _pdbx_unobs_or_zero_occ_residues.label_seq_id _pdbx_branch_scheme.auth_mon_id _struct_ref_seq.db_align_end _struct_ref_seq_dif.pdbx_auth_seq_num _struct_conf.end_label_seq_id _struct_conn.ptnr2_label_asym_id _struct_sheet_range.end_label_seq_id _pdbx_struct_sheet_hbond.range_1_auth_seq_id _atom_site.Cartn_y _pdbx_poly_seq_scheme.hetero _pdbx_validate_rmsd_bond.auth_seq_id_2 _pdbx_validate_rmsd_angle.auth_seq_id_2 _struct_ref_seq.pdbx_db_align_end_ins_code _struct_ref_seq_dif.pdbx_ordinal _struct_conf.pdbx_end_PDB_ins_code _struct_conn.ptnr2_label_atom_id _struct_sheet_range.pdbx_end_PDB_ins_code _pdbx_struct_sheet_hbond.range_2_label_atom_id _atom_site.Cartn_z _pdbx_validate_rmsd_bond.PDB_ins_code_2 _pdbx_validate_rmsd_angle.PDB_ins_code_2 _struct_ref_seq.pdbx_auth_seq_align_beg _struct_conf.end_label_asym_id _struct_conn.ptnr2_label_comp_id _struct_sheet_range.end_label_asym_id _pdbx_struct_sheet_hbond.range_2_label_comp_id _atom_site.occupancy _pdbx_validate_rmsd_bond.label_alt_id_2 _pdbx_validate_rmsd_angle.label_alt_id_2 _struct_ref_seq.pdbx_auth_seq_align_end _struct_conf.end_label_entity_id _struct_conn.ptnr2_label_seq_id _struct_sheet_range.end_label_entity_id _pdbx_struct_sheet_hbond.range_2_label_asym_id _atom_site.B_iso_or_equiv _pdbx_validate_rmsd_bond.bond_value _pdbx_validate_rmsd_angle.auth_atom_id_3 _struct_conf.end_auth_comp_id _struct_conn.ptnr2_auth_asym_id _struct_sheet_range.end_auth_comp_id _pdbx_struct_sheet_hbond.range_2_label_seq_id _atom_site.pdbx_formal_charge _pdbx_validate_rmsd_bond.bond_target_value _pdbx_validate_rmsd_angle.auth_asym_id_3 _struct_conf.end_auth_seq_id _struct_conn.ptnr2_auth_comp_id _struct_sheet_range.end_auth_seq_id _pdbx_struct_sheet_hbond.range_2_PDB_ins_code _atom_site.auth_atom_id _pdbx_validate_rmsd_bond.bond_deviation _pdbx_validate_rmsd_angle.auth_comp_id_3 _struct_conf.end_auth_asym_id _struct_conn.ptnr2_auth_seq_id _struct_sheet_range.end_auth_asym_id _pdbx_struct_sheet_hbond.range_2_auth_atom_id _atom_site.auth_comp_id _pdbx_validate_rmsd_bond.bond_standard_deviation _pdbx_validate_rmsd_angle.auth_seq_id_3 _struct_conf.pdbx_PDB_helix_class _struct_conn.ptnr2_symmetry _struct_sheet_range.symmetry _pdbx_struct_sheet_hbond.range_2_auth_comp_id _atom_site.auth_seq_id _pdbx_validate_rmsd_bond.linker_flag _pdbx_validate_rmsd_angle.PDB_ins_code_3 _struct_conf.details _struct_conn.pdbx_ptnr1_PDB_ins_code _pdbx_struct_sheet_hbond.range_2_auth_asym_id _atom_site.auth_asym_id _pdbx_validate_rmsd_angle.label_alt_id_3 _struct_conf.pdbx_PDB_helix_length _struct_conn.pdbx_ptnr1_label_alt_id _pdbx_struct_sheet_hbond.range_2_auth_seq_id _atom_site.pdbx_PDB_model_num _pdbx_validate_rmsd_angle.angle_value _struct_conn.pdbx_ptnr1_standard_comp_id _pdbx_validate_rmsd_angle.angle_target_value _struct_conn.pdbx_ptnr2_PDB_ins_code _pdbx_validate_rmsd_angle.angle_deviation _struct_conn.pdbx_ptnr2_label_alt_id _pdbx_validate_rmsd_angle.angle_standard_deviation _struct_conn.pdbx_ptnr3_PDB_ins_code _pdbx_validate_rmsd_angle.linker_flag _struct_conn.pdbx_ptnr3_label_alt_id _struct_conn.pdbx_ptnr3_label_asym_id _struct_conn.pdbx_ptnr3_label_atom_id _struct_conn.pdbx_ptnr3_label_comp_id _struct_conn.pdbx_ptnr3_label_seq_id _struct_conn.pdbx_PDB_id _struct_conn.pdbx_dist_value _struct_conn.pdbx_value_order
blocks() | hasTable | shape(0).all() | insertIdColumn() | transpose() | ~aS(plt.bar);
plt.yscale("log"); plt.xlabel("Section"); plt.ylabel("Size of table"); #plt.grid(True)
Sector 18 does look interesting. Let's check it out:
blocks() | hasTable | rows(18) | item() | headOut(30)
# loop_ _atom_site.group_PDB _atom_site.id _atom_site.type_symbol _atom_site.label_atom_id _atom_site.label_comp_id _atom_site.label_seq_id _atom_site.label_alt_id _atom_site.pdbx_PDB_ins_code _atom_site.label_asym_id _atom_site.label_entity_id _atom_site.Cartn_x _atom_site.Cartn_y _atom_site.Cartn_z _atom_site.occupancy _atom_site.B_iso_or_equiv _atom_site.pdbx_formal_charge _atom_site.auth_atom_id _atom_site.auth_comp_id _atom_site.auth_seq_id _atom_site.auth_asym_id _atom_site.pdbx_PDB_model_num ATOM 1 N N ALA 46 . . A 1 171.646 251.874 224.877 1 34.23 ? N ALA 27 A 1 ATOM 2 C CA ALA 46 . . A 1 172.298 252.181 223.613 1 34.95 ? CA ALA 27 A 1 ATOM 3 C C ALA 46 . . A 1 173.53 251.298 223.427 1 34.25 ? C ALA 27 A 1 ATOM 4 O O ALA 46 . . A 1 174.195 250.943 224.405 1 34.3 ? O ALA 27 A 1 ATOM 5 C CB ALA 46 . . A 1 172.7 253.664 223.554 1 35.54 ? CB ALA 27 A 1 ATOM 6 N N TYR 47 . . A 1 173.816 250.939 222.166 1 34.21 ? N TYR 28 A 1 ATOM 7 C CA TYR 47 . . A 1 174.968 250.129 221.763 1 34.2 ? CA TYR 28 A 1
Does look very promising. Seems like the exact coordinates to the individual atoms.
fields = blocks() | hasTable | rows(18) | item() | filt(op().startswith("_")) | shape(0); fields
21
blocks() | hasTable | rows(18) | item() | ~head(fields+2) | op().split(" ").all() | transpose.wrap(insertIdColumn(True)) | display()
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 ATOM 1 N N ALA 46 . . A 1 171.646 251.874 224.877 1 34.23 ? N ALA 27 A 1 ATOM 2 C CA ALA 46 . . A 1 172.298 252.181 223.613 1 34.95 ? CA ALA 27 A 1 ATOM 3 C C ALA 46 . . A 1 173.53 251.298 223.427 1 34.25 ? C ALA 27 A 1 ATOM 4 O O ALA 46 . . A 1 174.195 250.943 224.405 1 34.3 ? O ALA 27 A 1 ATOM 5 C CB ALA 46 . . A 1 172.7 253.664 223.554 1 35.54 ? CB ALA 27 A 1 ATOM 6 N N TYR 47 . . A 1 173.816 250.939 222.166 1 34.21 ? N TYR 28 A 1 ATOM 7 C CA TYR 47 . . A 1 174.968 250.129 221.763 1 34.2 ? CA TYR 28 A 1 ATOM 8 C C TYR 47 . . A 1 175.652 250.729 220.561 1 34.22 ? C TYR 28 A 1 ATOM 9 O O TYR 47 . . A 1 175.009 251.379 219.736 1 34.22 ? O TYR 28 A 1
coors = blocks() | hasTable | rows(18) | item() | ~head(fields+2) | op().split(" ").all() | cut(1, 2, 10, 11, 12) | toInt(0)\
| toFloat(2, 3, 4) | cut(1, 2, 3, 4) | deref() | aS(k1.Wrapper)
length = coors() | cut(1, 2, 3) | transpose() | (toMin() & toMax()).all() | ~apply(lambda a, b: b-a) | toMax() | op()*0.7; length
111.5765
lims = coors() | cut(1, 2, 3) | transpose() | (toMin() & toMax()).all() | ~apply(lambda a, b: (a+b)/2) | deref() | aS(torch.tensor)\
| op()[None].expand(2, 3) | ((op() - length/2) + (op() + length/2)) | transpose() | op().item().all(2) | deref(); lims
[[165.69174194335938, 277.26824951171875], [153.2052459716797, 264.78173828125], [139.60523986816406, 251.18174743652344]]
Atom types:
coors() | cut(0) | count() | ~sort() | display()
15087 C 64% 4659 O 20% 3849 N 16% 99 S 0%
# thumbnail
ax = plt.k3d(); atomTypes = ["C", "O", "N", "S"]
atomTypes | apply(aS(lambda x: coors() | filt(op()[0] == x) | cut()[1:] | transpose()) | ~aS(ax.plot3D, "o", markersize=1, alpha=0.5)) | ignore()
ax.set_xlim(*lims[0]); ax.set_ylim(*lims[1]); ax.set_zlim(*lims[2]); plt.legend(atomTypes)
plt.animate(6)