| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8
9 @note: Internal or legacy objects are intentionally left undocumented.
10 This typically indicates experimental code.
11 """
12
13 import os
14 import numpy
15
16 import csb.io
17 import csb.core
18 import csb.bio.utils
19 import csb.bio.structure
20 import csb.bio.sequence
21 from csb.bio.structure import SecondaryStructure
25
26 ISites = 'IS'
27 HMMFragments = 'HH'
28 HHThread = 'TH'
29 HHfrag = HHThread
30 Rosetta = 'NN'
31
37
38 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
39 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
40 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
41 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
42 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
43 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
44 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
45 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
46 53: 11.831245255954073, 54: 11.918975893263905 }
49 """
50 Base class, representing a match between a fragment and its target.
51 """
52
54
55 self._id = id
56 self._qstart = qstart
57 self._qend = qend
58 self._probability = probability
59 self._rmsd = rmsd
60 self._tm_score = tm_score
61 self._qlength = qlength
62
63 @property
66
67 @property
70
71 @property
74
75 @property
78
79 @property
82
83 @property
86
87 @property
90
91 @property
94
95 @property
98
99 @property
102
103 @property
106
116
123
125 """
126 Fragment-based phi/psi angles predictor.
127
128 @param target: target protein, containing fragment assignments
129 @type target: L{Target}
130 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
131 @type threshold: float
132 @param extend: pick alternative, longer cluster reps, if possible
133 @type extend: bool
134 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
135 will be performed on demand (the first time C{predictor.compute()} is invoked)
136
137 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
138 time. Subsequent calls will be very fast.
139 """
140
142
143 if not isinstance(target, Target):
144 raise TypeError(target)
145 if target.matches.length == 0:
146 raise ValueError('This target has no fragment assignments')
147
148 self._target = target
149 self._threshold = float(threshold)
150 self._extend = bool(extend)
151
152 self._initialized = False
153 self._reps = {}
154 self._clusters = {}
155
156 if init:
157 self.init()
158
159 @property
162
163 @property
166
167 @property
170
172 """
173 Compute and cache all L{FragmentCluster}s.
174 """
175
176 self._reps = {}
177 self._clusters = {}
178
179 for residue in self.target.residues:
180 cluster = self._filter(residue)
181
182 if cluster is not None:
183 rep = cluster.centroid()
184 if rep.has_alternative:
185 rep.exchange()
186
187 self._reps[residue.native.rank] = rep
188 self._clusters[residue.native.rank] = cluster.items
189
190 self._initialized = True
191
193
194 try:
195 nodes = []
196 for ai in residue.assignments:
197 node = ClusterNode.create(ai.fragment)
198 nodes.append(node)
199
200 cluster = FragmentCluster(nodes, threshold=self.threshold)
201 cluster.shrink(minitems=0)
202
203 return cluster
204
205 except (ClusterExhaustedError, ClusterDivergingError):
206 return None
207
209
210 for r in self._target.residues:
211 if r.native.rank == rank:
212 return r
213
214 raise ValueError('Rank {0} is out of range'.format(rank))
215
217 """
218 Extract torsion angles from the L{ClusterRep} at residue C{#rank}.
219
220 @param rank: target residue rank
221 @type rank: int
222
223 @rtype: L{TorsionPredictionInfo}
224 """
225
226 residue = self._residue(rank)
227 rep = residue.filter(threshold=self.threshold, extend=self.extend)
228
229 if rep is None:
230 return None
231
232 else:
233 fragment = rep.centroid
234 torsion = fragment.torsion_at(rank, rank)[0]
235 ss = fragment.sec_structure_at(rank, rank)[0]
236
237 return TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
238
240 """
241 Extract torsion angles from all L{ClusterRep}s, covering residue C{#rank}.
242
243 @param rank: target residue rank
244 @type rank: int
245
246 @return: L{TorsionPredictionInfo} instances, sorted by confidence
247 @rtype: tuple of L{TorsionPredictionInfo}
248 """
249
250 if not self._initialized:
251 self.init()
252
253 prediction = []
254
255 for rep in self._reps.values():
256
257 if rep.centroid.qstart <= rank <= rep.centroid.qend:
258
259 fragment = rep.centroid
260 torsion = fragment.torsion_at(rank, rank)[0]
261 ss = fragment.sec_structure_at(rank, rank)[0]
262 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss)
263
264 if rep is self._reps.get(rank, None):
265 info.primary = True
266
267 prediction.append(info)
268
269 prediction.sort(reverse=True)
270 return tuple(prediction)
271
273 """
274 Filter the current fragment map and create a new, completely flat,
275 non-overlapping map built from centroids, assigned iteratively by
276 decreasing confidence. Centroids with lower confidence which overlap
277 with previously assigned centroids will be trimmed to fill existing
278 gaps only.
279
280 @return: L{TorsionPredictionInfo} instances, one for each target residue
281 @rtype: tuple of L{TorsionPredictionInfo}
282 """
283
284 if not self._initialized:
285 self.init()
286
287 prediction = []
288 slots = set(range(1, self.target.length + 1))
289
290 reps = list(self._reps.values())
291 reps.sort(key=lambda i: i.confidence, reverse=True)
292
293 for rep in reps:
294
295 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
296 if rank in slots:
297 torsion = rep.centroid.torsion_at(rank, rank)[0]
298 ss = rep.centroid.sec_structure_at(rank, rank)[0]
299 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
300
301 prediction.append(info)
302 slots.remove(rank)
303
304 for rank in slots:
305 prediction.append(TorsionPredictionInfo(rank, 0, None))
306
307 prediction.sort(key=lambda i: i.rank)
308 return tuple(prediction)
309
311 """
312 Extract all torsion angles coming from all fragments, which had survived
313 the filtering and cover residue C{#rank}.
314
315 @param rank: target residue rank
316 @type rank: int
317
318 @return: all L{TorsionAngles} for a cluster at the specified residue
319 @rtype: tuple of L{TorsionAngles}
320 """
321
322 if not self._initialized:
323 self.init()
324 if rank not in self._clusters:
325 return tuple()
326
327 angles = []
328
329 for node in self._clusters[rank]:
330 fragment = node.fragment
331 torsion = fragment.torsion_at(rank, rank)[0]
332 angles.append(torsion)
333
334 return tuple(angles)
335
338 """
339 Struct container for a single torsion angle prediction.
340
341 @param rank: target residue rank
342 @type rank: int
343 @param confidence: confidence of prediction
344 @type confidence: float
345 @param torsion: assigned phi/psi/omega angles
346 @type torsion: L{TorsionAngles}
347 @param dssp: assigned secondary structure
348 @type dssp: L{SecondaryStructureElement}
349 @param primary: if True, designates that the assigned angles are extracted
350 from the L{ClusterRep} at residue C{#rank}; otherwise: the
351 angles are coming from another, overlapping L{ClusterRep}
352
353 """
354
356
357 self.rank = rank
358 self.confidence = confidence
359 self.torsion = torsion
360 self.primary = primary
361 self.dssp = dssp
362
364 """
365 @return: convert this prediction to a tuple: (confidence, phi, psi, omega)
366 @rtype: tuple
367 """
368 return tuple([self.confidence, self.torsion.phi, self.torsion.psi, self.torsion.omega])
369
371 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
372
375
387
389 """
390 Represents a protein structure prediction target.
391
392 @param id: target sequence ID, in PDB accnC format
393 @type id: str
394 @param length: total target sequence length
395 @type length: int
396 @param residues: a list, containing target's residues. See also
397 L{Target.from_sequence}
398 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
399 """
400
401 - def __init__(self, id, length, residues, overlap=None, segments=None, factory=AssignmentFactory()):
402
403 self._id = id
404 self._accession = id[:-1]
405 self._chain_id = id[-1]
406 self._length = length
407 self._overlap = overlap
408 self._factory = factory
409
410 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
411 self._errors = csb.core.CollectionContainer()
412
413 resi = [factory.residue(native) for native in residues]
414 self._residues = csb.core.ReadOnlyCollectionContainer(items=resi,
415 type=TargetResidue, start_index=1)
416
417 if segments is not None:
418 segments = dict([(s.start, s) for s in segments])
419 self._segments = csb.core.ReadOnlyDictionaryContainer(items=segments)
420
421 @staticmethod
423 """
424 Factory, which builds L{Target} objects from a bare sequence.
425
426 @param sequence: target's sequence
427 @type sequence: L{csb.bio.sequence.AbstractSequence}, str or iterable
428
429 @rtype: L{Target}
430 """
431
432 if isinstance(sequence, csb.bio.sequence.Sequence):
433 sequence = sequence.sequence
434
435 residues = []
436
437 for rn, aa in enumerate(sequence, start=1):
438 residue = csb.bio.structure.ProteinResidue(rank=rn, type=aa)
439 residues.append(residue)
440
441 return Target(id, len(residues), residues)
442
443 @staticmethod
445 """
446 Factory, which builds L{Target} objects from an HMM profile.
447
448 @param hmm: target's HMM
449 @type hmm: L{csb.bio.hmm.ProfileHMM}
450
451 @rtype: L{Target}
452 """
453
454 residues = [ r.clone() for r in hmm.residues ]
455 return Target(hmm.id, hmm.layers.length, residues)
456
457 @staticmethod
462
463 @property
466
467 @property
470
471 @property
474
475 @property
478
479 @property
482
483 @property
486
487 @property
490
491 @property
494
495 @property
498
499 @property
502
503 @property
506
508 """
509 Add a new fragment match.
510 @param fragment: fragment to assign
511 @type fragment: L{Assignment}
512 """
513
514 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
515 raise ValueError("Fragment out of range")
516
517 self._assignments._append_item(fragment)
518
519 for rank in range(fragment.qstart, fragment.qend + 1):
520 ai = ResidueAssignmentInfo(fragment, rank)
521 self._residues[rank].assign(ai)
522
523 if fragment.segment is not None:
524 try:
525 self._segments[fragment.segment].assign(fragment)
526 except KeyError:
527 raise ValueError("Undefined segment starting at {0}".format(fragment.segment))
528
530 """
531 Assign a bunch of fragments at once.
532 @type fragments: iterable of L{Assignment}s
533 """
534 for frag in fragments:
535 self.assign(frag)
536
538 """
539 Filter the current fragment map using a L{FragmentCluster}.
540
541 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
542 @type threshold: float
543 @param extend: pick extended alternatives where possible (default=False)
544 @type extend: bool
545
546 @return: a new target, containing only cluster centroids/reps
547 @rtype: L{Target}
548 """
549
550 target = self.clone()
551
552 for residue in self.residues:
553 rep = residue.filter(threshold=threshold, extend=extend)
554
555 if rep is not None:
556 target.assign(rep.centroid)
557
558 return target
559
561 """
562 @return: a deep copy of the target
563 @rtype: L{Target}
564 """
565
566 segments = [self.segments[start] for start in self.segments]
567 segments = [TargetSegment(s.start, s.end, s.count) for s in segments]
568
569 target = self._factory.target(self.id, self.length, [r.native for r in self.residues],
570 overlap=self._overlap, segments=segments)
571
572 return target
573
576 """
577 Wrapper around L{Target}'s native residues. Decorates them with additional,
578 fragment-related methods.
579
580 @type native_residue: L{csb.bio.structure.ProteinResidue}
581 """
582
584
585 self._type = native_residue.type
586 self._native = native_residue.clone()
587 self._assignments = csb.core.ReadOnlyCollectionContainer(type=ResidueAssignmentInfo)
588
589 @property
592
593 @property
596
597 @property
600
603
605 """
606 @return: the fragment with the lowest RMSD at this position in the L{Target}
607 @rtype: L{Assignment}
608 """
609
610 best = None
611
612 for ai in self.assignments:
613 a = ai.fragment
614 if a.length < FragmentCluster.MIN_LENGTH:
615 continue
616 if best is None or a.rmsd < best.rmsd:
617 best = a
618 elif a.rmsd == best.rmsd and a.length > best.length:
619 best = a
620
621 return best
622
624 """
625 Filter all fragments, covering this position in the L{Target} using a
626 L{FragmentCluster}.
627
628 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
629 @type method: str
630 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
631 @type threshold: float
632 @param extend: pick extended alternative where possible (default=False)
633 @type extend: bool
634
635 @return: cluster's representative (if converged) or None
636 @rtype: L{ClusterRep} or None
637 """
638
639 try:
640 nodes = []
641 for ai in self.assignments:
642 node = ClusterNode.create(ai.fragment, method, extend)
643 nodes.append(node)
644
645 cluster = FragmentCluster(nodes, threshold=threshold)
646
647 center = cluster.shrink(minitems=0)
648 if center.has_alternative:
649 center.exchange()
650
651 return center
652
653 except (ClusterExhaustedError, ClusterDivergingError):
654 return None
655
657 """
658 @return: the longest fragment, covering the current position
659 @rtype: L{Assignment}
660 """
661 best = None
662
663 for q in self.assignments:
664 if best is None or (q.fragment.length > best.length):
665 best = q.fragment
666
667 return best
668
670 """
671 @return: the residue-wise precision of the fragment library at the
672 current position (percentage).
673
674 @param threshold: true-positive RMSD cutoff (default=1.5)
675 @type threshold: float
676 @rtype: float
677 """
678
679 if self.assignments.length < 1:
680 return None
681 else:
682 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
683 pos = len(positive) * 100.0 / self.assignments.length
684
685 return pos
686
688
690
691 self._start = start
692 self._end = end
693 self._count = count
694
695 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
696
697 @property
700
701 @property
704
705 @property
708
709 @property
712
713 @property
716
718 if fragment.segment != self.start:
719 raise ValueError('Segment origin mismatch: {0} vs {1}'.format(fragment.segment, self.start))
720 else:
721 self._assignments._append_item(fragment)
722
724
725 best = None
726
727 for a in self.assignments:
728 if a.length < FragmentCluster.MIN_LENGTH:
729 continue
730 if best is None or a.rmsd < best.rmsd:
731 best = a
732 elif a.rmsd == best.rmsd and a.length > best.length:
733 best = a
734
735 return best
736
738
739 try:
740 cluster = FragmentCluster(self.assignments, threshold=1.5,
741 connectedness=0.5, method=method)
742 centroid = cluster.shrink(minitems=1)
743 return centroid
744
745 except ClusterExhaustedError:
746 return None
747 finally:
748 del cluster
749
751
752 best = None
753
754 for q in self.assignments:
755 if best is None or (q.length > best.length):
756 best = q
757
758 return best
759
761
762 rmsds = []
763
764 for q in self.assignments:
765 for s in self.assignments:
766 if q is not s:
767 r = q.rmsd_to(s, min_overlap)
768 if r is not None:
769 rmsds.append(r)
770 else:
771 assert q.rmsd_to(s, 1) < 0.01
772
773 return rmsds
774
776
777 mdas = []
778
779 for q in self.assignments:
780 for s in self.assignments:
781 if q is not s:
782 m = q.mda_to(s, min_overlap)
783 if m is not None:
784 mdas.append(m)
785 return mdas
786
788
789 from csb.bio.hmm import RELATIVE_SA
790 from csb.bio.io.hhpred import ScoreUnits, HHProfileParser
791
792 def convert_sa(sa):
793 return numpy.array([ RELATIVE_SA[i] for i in sa ])
794
795 sources = {}
796 scores = []
797
798 for q in self.assignments:
799 for s in self.assignments:
800
801 if s.source_id not in sources:
802 hmm = HHProfileParser(os.path.join(profiles, s.source_id + '.hhm')).parse()
803 sources[s.source_id] = hmm.dssp_solvent
804
805 if q is not s:
806
807 common = q.overlap(s)
808 if len(common) >= min_overlap:
809
810 qsa = q.solvent_at(sources[q.source_id], min(common), max(common))
811 ssa = s.solvent_at(sources[s.source_id], min(common), max(common))
812
813 if '-' in qsa + ssa:
814 continue
815
816 qsa = convert_sa(qsa)
817 ssa = convert_sa(ssa)
818 assert len(qsa) == len(ssa)
819 sa_rmsd = numpy.sqrt(numpy.sum((qsa - ssa) ** 2) / float(len(qsa)))
820
821 scores.append(sa_rmsd)
822 return scores
823
825
826 from csb.bio.hmm import BACKGROUND
827 back = numpy.sqrt(numpy.array(BACKGROUND))
828
829 sources = {}
830 scores = []
831
832 for q in self.assignments:
833 for s in self.assignments:
834
835 if s.source_id not in sources:
836 # hmm = HHProfileParser(os.path.join(hmm_path, s.source_id + '.hhm')).parse(ScoreUnits.Probability)
837 sources[s.source_id] = csb.io.Pickle.load(open(os.path.join(profiles, s.source_id + '.pkl'), 'rb'))
838
839 if q is not s:
840
841 common = q.overlap(s)
842 if len(common) >= min_overlap:
843
844 qprof = q.profile_at(sources[q.source_id], min(common), max(common))
845 sprof = s.profile_at(sources[s.source_id], min(common), max(common))
846
847 #score = qhmm.emission_similarity(shmm)
848 assert len(qprof) == len(sprof)
849 dots = [ numpy.dot(qprof[i] / back, sprof[i] / back) for i in range(len(qprof)) ]
850 score = numpy.log(numpy.prod(dots))
851 if score is not None:
852 scores.append(score)
853 return scores
854
856
857 binsize = float(binsize)
858 bins = numpy.ceil(numpy.array(data) / binsize)
859
860 hist = dict.fromkeys(bins, 0)
861 for bin in bins:
862 hist[bin] += (1.0 / len(bins))
863
864 freq = numpy.array(hist.values())
865 return - numpy.sum(freq * numpy.log(freq))
866
871
876
878
879 rmsds = self.pairwise_rmsd()
880
881 if len(rmsds) < 1:
882 return None
883
884 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
885
887
888 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles)
889
890 if len(sa_rmsds) < 1:
891 return None
892
893 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
894
896
897 if self.assignments.length < 1:
898 return None
899
900 return sum([1 for i in self.assignments if i.rmsd <= threshold]) / float(self.assignments.length)
901
903
904 cons = self.rmsd_consistency()
905
906 if cons is None:
907 return 0
908 else:
909 return numpy.log10(self.count) * cons
910
912
914
915 if not assignment.qstart <= rank <= assignment.qend:
916 raise ValueError('Rank {0} is not matched by this assignment')
917
918 self._assignment = assignment
919 self._rank = rank
920 self._relrank = rank - assignment.qstart
921
922 @property
924 return self._assignment.backbone[self._relrank]
925
926 @property
929
931 """
932 Represents a match between a fragment and its target.
933
934 @param source: source structure (must have torsion angles precomputed)
935 @type source: L{csb.bio.structure.Chain}
936 @param start: start position in C{source} (rank)
937 @type start: int
938 @param end: end position in C{source} (rank)
939 @type end: int
940 @param id: fragment ID
941 @type id: str
942 @param qstart: start position in target (rank)
943 @type qstart: int
944 @param qend: end position in target (rank)
945 @type qend: int
946 @param probability: probability of assignment
947 @type probability: float
948 @param rmsd: RMSD of the fragment, compared to target's native structure
949 @type rmsd: float
950 """
951
952 - def __init__(self, source, start, end, id, qstart, qend, probability, rmsd, tm_score=None,
953 score=None, neff=None, segment=None, internal_id=None):
954
955 assert source.has_torsion
956 sub = source.subregion(start, end, clone=True)
957 try:
958 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
959 except csb.core.ItemNotFoundError:
960 raise csb.bio.structure.Broken3DStructureError()
961 torsion = [r.torsion.copy() for r in sub.residues]
962
963 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
964 self._torsion = torsion
965 self._sequence = sub.sequence
966
967 self._source_id = source.accession[:4] + source.id
968 self._start = start
969 self._end = end
970
971 self._score = score
972 self._neff = neff
973 self._ss = None
974
975 self._segment_start = segment
976 self.internal_id = internal_id
977
978 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
979
980 self._ss = SecondaryStructure('-' * self.length)
981
982 @staticmethod
984 """
985 Create a new L{Assignment} given a source rosetta fragment.
986
987 @param fragment: rosetta fragment
988 @type fragment: L{RosettaFragment}
989 @param provider: PDB database provider
990 @type provider: L{StructureProvider}
991
992 @rtype: L{Assignment}
993 """
994 structure = provider.get(fragment.accession)
995 source = structure.chains[fragment.chain]
996 source.compute_torsion()
997
998 id = "{0}:{1}-{2}".format(fragment.source_id, fragment.start, fragment.end)
999
1000 return Assignment(source, fragment.start, fragment.end, id,
1001 fragment.qstart, fragment.qend, 0, 0)
1002
1003 @property
1006
1007 @property
1010
1011 @property
1014
1015 @property
1018
1019 @property
1022
1023 @property
1026
1027 @property
1030
1031 @property
1034
1035 @property
1038
1039 @property
1042 @secondary_structure.setter
1044
1045 if isinstance(value, csb.core.string):
1046 value = csb.bio.structure.SecondaryStructure(value)
1047 if len(str(value)) != self.length:#(value.end - value.start + 1) != self.length:
1048 raise ValueError("Invalid secondary structure length", len(str(value)), self.length )
1049
1050 self._ss = value
1051
1053 """
1054 Apply rotation/translation to fragment's coordinates in place.
1055 """
1056
1057 for ca in self.backbone:
1058 newca = numpy.dot(ca, numpy.transpose(rotation)) + translation
1059 for i in range(3):
1060 ca[i] = newca[i]
1061
1063
1064 if not (self.qstart <= qstart <= qend <= self.qend):
1065 raise ValueError('Region {0}..{1} is out of range {2.qstart}..{2.qend}'.format(qstart, qend, self))
1066
1068 """
1069 @return: True if the fragment is centered around position=C{rank}.
1070 @rtype: bool
1071 """
1072
1073 if self.qstart < rank < self.qend:
1074 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
1075 return True
1076
1077 return False
1078
1080 """
1081 @return: the CA coordinates of the fragment at the specified subregion.
1082 @rtype: list
1083 """
1084
1085 self._check_range(qstart, qend)
1086
1087 relstart = qstart - self.qstart
1088 relend = qend - self.qstart + 1
1089
1090 return self.backbone[relstart : relend]
1091
1093 """
1094 @return: the torsion angles of the fragment at the specified subregion.
1095 @rtype: list
1096 """
1097
1098 self._check_range(qstart, qend)
1099
1100 relstart = qstart - self.qstart
1101 relend = qend - self.qstart + 1
1102
1103 return self.torsion[relstart : relend]
1104
1106
1107 self._check_range(qstart, qend)
1108
1109 relstart = qstart - self.qstart
1110 relend = qend - self.qstart + 1
1111
1112 return sa_string[relstart : relend]
1113
1115
1116 self._check_range(qstart, qend)
1117 start = qstart - self.qstart + 1
1118 end = qend - self.qstart + 1
1119
1120 return self.secondary_structure.scan(start, end, loose=True, cut=True)
1121
1123
1124 self._check_range(qstart, qend)
1125
1126 start = qstart - self.qstart + self.start
1127 end = qend - self.qstart + self.start
1128
1129 if hasattr(source, 'subregion'):
1130 return source.subregion(start, end)
1131 else:
1132 return source[start - 1 : end]
1133
1135
1136 self._check_range(qstart, qend)
1137
1138 start = qstart - self.qstart + self.start
1139 end = qend - self.qstart + self.start
1140
1141 return source.subregion(start, end)
1142
1144 """
1145 @type other: L{Assignment}
1146 @return: target positions, covered by both C{self} and C{other}
1147 @rtype: set of int
1148 """
1149
1150 qranks = set(range(self.qstart, self.qend + 1))
1151 sranks = set(range(other.qstart, other.qend + 1))
1152
1153 return qranks.intersection(sranks)
1154
1156 """
1157 @return: the CA RMSD between C{self} and C{other}.
1158
1159 @param other: another fragment
1160 @type other: L{Assignment}
1161 @param min_overlap: require at least that number of overlapping residues
1162 (return None if not satisfied)
1163 @type min_overlap: int
1164
1165 @rtype: float
1166 """
1167
1168 common = self.overlap(other)
1169
1170 if len(common) >= min_overlap:
1171
1172 qstart, qend = min(common), max(common)
1173
1174 q = self.backbone_at(qstart, qend)
1175 s = other.backbone_at(qstart, qend)
1176
1177 if len(q) > 0 and len(s) > 0:
1178 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
1179
1180 return None
1181
1183
1184 common = self.overlap(other)
1185
1186 if len(common) >= min_overlap:
1187
1188 qstart, qend = min(common), max(common)
1189
1190 q = self.backbone_at(qstart, qend)
1191 s = other.backbone_at(qstart, qend)
1192
1193 if len(q) > 0 and len(s) > 0:
1194 return csb.bio.utils.rmsd(q, s) / RANDOM_RMSD[ len(common) ]
1195
1196 return None
1197
1199
1200 common = self.overlap(other)
1201
1202 if len(common) >= min_overlap:
1203
1204 qstart, qend = min(common), max(common)
1205
1206 q = self.torsion_at(qstart, qend)
1207 s = other.torsion_at(qstart, qend)
1208
1209 if len(q) > 0 and len(s) > 0:
1210
1211 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:]) # phi: 2 .. L
1212 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1]) # psi: 1 .. L-1
1213
1214 return max(maxphi, maxpsi)
1215
1216 return None
1217
1219 """
1220 @deprecated: this method will be deleted soon. Use
1221 L{csb.bio.fragments.rosetta.OutputBuilder} instead.
1222 """
1223 stream = csb.io.MemoryStream()
1224
1225 if weight is None:
1226 weight = self.probability
1227 if not qstart:
1228 qstart = self.qstart
1229 if not qend:
1230 qend = self.qend
1231
1232 source.compute_torsion()
1233 chain = self.chain_at(source, qstart, qend)
1234
1235 for i, r in enumerate(chain.residues):
1236
1237 acc = self.source_id[:4]
1238 ch = self.source_id[4].upper()
1239
1240 start = qstart - self.qstart + self.start + i
1241 aa = r.type
1242 ss = 'L'
1243 phi, psi, omega = 0, 0, 0
1244 if r.torsion.phi:
1245 phi = r.torsion.phi
1246 if r.torsion.psi:
1247 psi = r.torsion.psi
1248 if r.torsion.omega:
1249 omega = r.torsion.omega
1250
1251 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
1252
1253 return stream.getvalue()
1254
1257
1260
1263
1265 """
1266 Provides clustering/filtering of the fragments, covering a common residue
1267 in the target. Clustering is done via iterative shrinking of the cluster.
1268 At each iteration, node rejection (deletion) is attempted for each node. The
1269 node rejection, causing the most significant drop in the average pairwise
1270 distance (RMSD) in the cluster, is retained. This procedure is repeated
1271 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1272 2) the cluster gets exhausted or 3) node rejection no longer
1273 causes a drop in the average distance (not converging).
1274
1275 @param items: cluster members
1276 @type items: iterable of L{ClusterNode}s
1277 @param threshold: RMSD threshold; continue shrinking until the mean distance
1278 drops below this value (default=1.5)
1279 @type threshold: float
1280 @param connectedness: use only nodes which are connected to at least c% of all
1281 initial nodes (default=0.5, that means 50%)
1282 @type connectedness: float
1283 """
1284
1285 MIN_LENGTH = 6
1286
1288
1289 items = set(i for i in items if i.fragment.length >= FragmentCluster.MIN_LENGTH)
1290
1291 self._matrix = {}
1292 self._threshold = float(threshold)
1293 self._connectedness = float(connectedness)
1294
1295 for i in items:
1296
1297 self._matrix[i] = {}
1298 conn = 0.0
1299
1300 for j in items:
1301 distance = i.distance(j)
1302 if distance is not None:
1303 conn += 1
1304 self._matrix[i][j] = distance
1305
1306 if conn / len(items) < self.connectedness:
1307 # reject i as a first class node
1308 del self._matrix[i]
1309
1310 self._items = set(self._matrix.keys())
1311
1312 if len(self._items) < 1:
1313 raise ClusterEmptyError()
1314
1315 self._initcount = self.count
1316
1317 @property
1320
1321 @property
1324
1325 @property
1327 return tuple(i.fragment for i in self._items)
1328
1329 @property
1332 @threshold.setter
1334 self._threshold = float(value)
1335
1336 @property
1339
1341
1342 d = []
1343
1344 for i in self._matrix:
1345 if skip is i:
1346 continue
1347
1348 for j in self._matrix[i]:
1349 if skip is not j:
1350 d.append(self._matrix[i][j])
1351
1352 return d
1353
1360
1362 """
1363 @return: the current mean distance in the cluster
1364 @rtype: float
1365 """
1366
1367 d = self._distances(skip=skip)
1368
1369 if len(d) > 0:
1370 return numpy.mean(d)
1371 else:
1372 raise ClusterExhaustedError()
1373
1375 """
1376 @return: the current representative fragment
1377 @rtype: L{ClusterRep}
1378
1379 @note: the cluster rep is the node with the lowest average distance
1380 to all other nodes. If a fixed fragment exists, structurally similar
1381 to the rep, but longer, this fragment may be suggested as an alternative
1382 (see also L{ClusterRep}).
1383 """
1384
1385 alt = None
1386 cen = None
1387 avg = None
1388
1389 for i in self._matrix:
1390
1391 curravg = numpy.mean(list(self._matrix[i].values()))
1392
1393 if avg is None or curravg < avg:
1394 avg = curravg
1395 cen = i
1396 elif curravg == avg:
1397 if i.fragment.length > cen.fragment.length:
1398 cen = i
1399
1400 d = self._distances()
1401 mean = numpy.mean(d)
1402 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1403
1404 for i in self._matrix:
1405 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1406 distance = self._distance(i, cen)
1407 if distance is not None and distance < 0.5 * self.threshold:
1408 if alt is None or alt.fragment.length < i.fragment.length:
1409 alt = i
1410
1411 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1412 rejections=(self._initcount - self.count))
1413
1415 """
1416 Remove C{item} from the cluster.
1417
1418 @type item: L{ClusterNode}
1419 @raise ClusterExhaustedError: if this is the last remaining item
1420 """
1421
1422 if self.count == 1:
1423 raise ClusterExhaustedError()
1424
1425 assert not item.fixed
1426
1427 for i in self._matrix:
1428 if item in self._matrix[i]:
1429 del self._matrix[i][item]
1430
1431 del self._matrix[item]
1432 self._items.remove(item)
1433
1435 """
1436 Shrink the cluster by a single node.
1437
1438 @return: True on successful shrink, False otherwise (e.g. if
1439 already converged)
1440 @rtype: bool
1441 @raise ClusterExhaustedError: if exhausted
1442 @raise ClusterDivergingError: if not converging
1443 """
1444
1445 mean = self.mean()
1446 if mean <= self.threshold or self.count == 1:
1447 return False # already shrunk enough
1448
1449 m = {}
1450
1451 for i in self._matrix:
1452 if not i.fixed:
1453 newmean = self.mean(skip=i)
1454 m[newmean] = i
1455
1456 if len(m) == 0: # only fixed items remaining
1457 raise ClusterExhaustedError()
1458
1459 newmean = min(m)
1460
1461 if newmean > mean:
1462 raise ClusterDivergingError() # can't converge, usually when fixed items are too far away from the average
1463 elif newmean < mean:
1464 junk = m[newmean]
1465 self.reject(junk)
1466 return True # successful shrink
1467 else:
1468 return False # converged
1469
1471 """
1472 Start automatic shrinking.
1473
1474 @param minitems: absolute minimum of the number of nodes in the cluster
1475 @type minitems: int
1476
1477 @return: cluster's representative: the node with the lowest average
1478 distance to all other nodes in the cluster
1479 @rtype: L{ClusterRep}
1480
1481 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1482 still not converged
1483 """
1484
1485 if self.count > minitems:
1486
1487 while self.shrinkone():
1488 if self.count <= minitems:
1489 raise ClusterExhaustedError()
1490 else:
1491 raise ClusterExhaustedError()
1492
1493 return self.centroid()
1494
1496 """
1497 Cluster node.
1498
1499 @param fragment: fragment
1500 @type fragment: L{Assignment}
1501 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1502 @type distance: str
1503 @param fixed: mark this node as fixed (cannot be rejected)
1504 @type fixed: bool
1505 """
1506
1507 FIXED = 0.7
1508
1509 @staticmethod
1511 """
1512 Create a new L{ClusterNode} given a specified C{Assignment}. If this
1513 assignment is a high probability match, define it as a fixed fragment.
1514
1515 @rtype: L{ClusterNode}
1516 """
1517 if fragment.probability > ClusterNode.FIXED and fragment.length >= FragmentCluster.MIN_LENGTH:
1518 return ClusterNode(fragment, distance=method, fixed=extend)
1519 else:
1520 return ClusterNode(fragment, distance=method, fixed=False)
1521
1523
1524 if fixed and fragment.length < FragmentCluster.MIN_LENGTH:
1525 raise ValueError("Can't fix a short fragment")
1526
1527 self.fragment = fragment
1528 self.fixed = bool(fixed)
1529
1530 self._distance = getattr(self.fragment, distance)
1531
1533 """
1534 @return: the distance between self and another node
1535 @type other: L{ClusterNode}
1536 @rtype: float
1537 """
1538 return self._distance(other.fragment)
1539
1541 """
1542 Cluster's representative (centroid) node. This object carries the
1543 result of shrinking itself.
1544
1545 @param centroid: rep node
1546 @type centroid: L{ClusterNode}
1547 @param mean: current mean distance in the cluster
1548 @type mean: float
1549 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1550 @type consistency: float
1551 @param count: current number of nodes in the cluster
1552 @type count: int
1553 @param rejections: total number of rejections
1554 @type rejections: int
1555 @param alternative: suggested cluster rep alternative (e.g. structurally
1556 similar to the centroid, but longer)
1557 @type param:
1558 """
1559
1561
1562 if isinstance(centroid, ClusterNode):
1563 centroid = centroid.fragment
1564 if isinstance(alternative, ClusterNode):
1565 alternative = alternative.fragment
1566
1567 self._centroid = centroid
1568 self._alternative = alternative
1569 self._mean = mean
1570 self._consistency = consistency
1571 self._count = count
1572 self._rejections = rejections
1573
1574 @property
1576 """
1577 Confidence of assignment: log10(count) * consistency
1578 """
1579 if self.count <= 0 or self.count is None or self.consistency is None:
1580 return 0
1581 else:
1582 return numpy.log10(self.count) * self.consistency
1583
1584 @property
1587
1588 @property
1591
1592 @property
1595
1596 @property
1599
1600 @property
1603
1604 @property
1607
1608 @property
1611
1613 """
1614 If an alternative is available, swap the centroid and the alternative.
1615 """
1616
1617 if self._alternative is not None:
1618
1619 centroid = self._centroid
1620 self._centroid = self._alternative
1621 self._alternative = centroid
1622
1624 """
1625 @deprecated: this method is obsolete and will be deleted soon
1626 """
1627 return self.centroid.to_rosetta(source, weight=self.confidence)
1628
1630
1631 @staticmethod
1633
1634 if center.centroid.qstart <= (start - overhang):
1635 start -= overhang
1636 elif center.centroid.qstart < start:
1637 start = center.centroid.qstart
1638
1639 if center.centroid.qend >= (end + overhang):
1640 end += overhang
1641 elif center.centroid.qend > end:
1642 end = center.centroid.end
1643
1644 return AdaptedAssignment(center, start, end)
1645
1647
1648 if qstart < center.centroid.qstart:
1649 raise ValueError(qstart)
1650 if qend > center.centroid.qend:
1651 raise ValueError(qend)
1652
1653 self._qstart = qstart
1654 self._qend = qend
1655 self._center = center
1656
1657 @property
1659 return self._center.centroid
1660
1661 @property
1664
1665 @property
1667 return self._center.confidence
1668
1669 @property
1672
1673 @property
1676
1677 @property
1680
1683
1686
1688
1690
1691 if not length > 0:
1692 raise ValueError(length)
1693
1694 self._length = int(length)
1695 self._slots = set(range(1, self._length + 1))
1696 self._map = {}
1697
1698 centers = list(centroids)
1699 centers.sort(key=lambda i: i.confidence, reverse=True)
1700
1701 for c in centers:
1702 self.assign(c)
1703
1704 @property
1707
1709
1710 for r in range(center.centroid.qstart, center.centroid.qend + 1):
1711 if r in self._slots:
1712 self._map[r] = center
1713 self._slots.remove(r)
1714
1716
1717 center = None
1718 start = None
1719 end = None
1720
1721 for r in range(1, self._length + 1):
1722
1723 if center is None:
1724 if r in self._map:
1725 center = self._map[r]
1726 start = end = r
1727 else:
1728 center = None
1729 start = end = None
1730 else:
1731 if r in self._map:
1732 if self._map[r] is center:
1733 end = r
1734 else:
1735 yield AdaptedAssignment(center, start, end)
1736 center = self._map[r]
1737 start = end = r
1738 else:
1739 yield AdaptedAssignment(center, start, end)
1740 center = None
1741 start = end = None
1742
1745
1747
1748 self.rank = rank
1749 self.confidence = confidence
1750 self.confident = confident
1751 self.count = count
1752 self.rep = rep
1753
1755 """
1756 Simplifies the construction of fragment libraries.
1757 """
1758
1762
1764 """
1765 Build a fragment library given a L{Target} and its L{Assignment}s.
1766
1767 @param target: target protein
1768 @type target: L{Target}
1769
1770 @rtype: L{RosettaFragmentMap}
1771 """
1772
1773 frag_factory = self.rosetta.RosettaFragment
1774 fragments = list(map(frag_factory.from_object, target.matches))
1775 #fragments = [ frag_factory.from_object(f) for f in target.matches if f.length >= 6 ]
1776 fragments.sort()
1777
1778 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1779
1781 """
1782 Build a fixed-length fragment library from a list of
1783 variable-length L{Assignment}s.
1784
1785 @param fragments: source fragments
1786 @type fragments: iterable of L{RosettaFragment}s
1787 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1788 @type window: int
1789
1790 @return: fixed-length fragment library
1791 @rtype: L{RosettaFragmentMap}
1792 """
1793
1794 frags = []
1795
1796 for f in fragments:
1797 for qs in range(f.qstart, f.qend - window + 1):
1798 frags.append(f.subregion(qs, qs + window - 1))
1799
1800 return self.rosetta.RosettaFragmentMap(frags)
1801
1803 """
1804 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1805 The regions to be complemented are determined by calculating the confidence
1806 at each residue (by filtering).
1807
1808
1809 @param target: target protein
1810 @type target: L{Target}
1811 @param filling: additional fragments to place in the low-conf regions
1812 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1813 @param threshold: confidence threshold
1814 @type threshold: float
1815
1816 @return: complemented fragment library
1817 @rtype: L{RosettaFragmentMap}
1818 """
1819
1820 fragmap = self.make_fragset(target)
1821 covered = set()
1822
1823 for r in target.residues:
1824
1825 if r.assignments.length == 0:
1826 if callback:
1827 callback(ResidueEventInfo(r.native.rank, None, 0, False))
1828 continue
1829
1830 cluster = r.filter()
1831 if cluster is None:
1832 if callback:
1833 callback(ResidueEventInfo(r.native.rank, 0, 0, False))
1834 continue
1835
1836 if cluster.confidence >= threshold:
1837 covered.add(r.native.rank)
1838 elif callback:
1839 callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, False))
1840
1841 for r in target.residues:
1842 if r.native.rank not in covered: # true for gaps and low-conf residues
1843 fragmap.mark_unconfident(r.native.rank)
1844
1845 for frag in filling:
1846 fragmap.complement(frag)
1847
1848 return fragmap
1849
1851 """
1852 Builed a filtered fragment library (by clustering), containing only
1853 representative fragments (cluster centroids).
1854
1855 @param target: target protein
1856 @type target: L{Target}
1857 @param extend: if True, pick alternative reps if available
1858 @type extend: bool
1859
1860 @return: filtered fragment library
1861 @rtype: L{RosettaFragmentMap}
1862 """
1863
1864 fragments = []
1865
1866 for r in target.residues:
1867 if r.assignments.length == 0:
1868 continue
1869
1870 cluster = r.filter(extend=extend)
1871 if cluster is None:
1872 continue
1873
1874 if extend and cluster.has_alternative:
1875 best = cluster.alternative
1876 else:
1877 best = cluster.centroid
1878
1879 fragment = self.rosetta.RosettaFragment.from_object(best)
1880 fragments.append(fragment)
1881 if callback:
1882 callback(ResidueEventInfo(r.native.rank, cluster.confidence, cluster.count, rep=cluster.centroid))
1883
1884 fragments.sort()
1885 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1886
1888 """
1889 Mix fragments from multiple libraries.
1890
1891 @type fragsets: L{RosettaFragmentMap}
1892 @return: mixed fragment library
1893 @rtype: L{RosettaFragmentMap}
1894 """
1895
1896 fragments = []
1897 length = 0
1898
1899 for fragset in fragsets:
1900 if fragset._length > length:
1901 length = fragset._length
1902
1903 for fragment in fragset:
1904 fragments.append(fragment)
1905
1906 return self.rosetta.RosettaFragmentMap(fragments, length)
1907
1910
1912
1913 FACTORY = None
1914 DSN = None
1915
1917
1918 self.factory = factory or self.__class__.FACTORY
1919 self.cs = dsn or self.__class__.DSN
1920 self.connection = None
1921 self.cursor = None
1922
1924
1925 self.connection = self.factory(self.cs)
1926 try:
1927 self.cursor = self.connection.cursor()
1928 except:
1929 self.connection.close()
1930 raise
1931 return self
1932
1940
1942
1943 self._pdb = pdb_paths
1944 self._connection = None
1945
1946 from csb.bio.io.wwpdb import find, StructureParser
1947 self._parser = StructureParser
1948 self._find = find
1949 self._factory = factory
1950
1951 try:
1952 import psycopg2.extras
1953 except ImportError:
1954 raise RuntimeError('Please install the psycopg2 module first')
1955
1956 if connection_string is None:
1957 connection_string = self.connection_string()
1958
1959 BenchmarkAdapter.Connection.FACTORY = psycopg2.extras.DictConnection
1960 BenchmarkAdapter.Connection.DSN = connection_string
1961
1962 @staticmethod
1964
1965 fields = ['dbname={0}'.format(database)]
1966
1967 if host:
1968 fields.append('host={0}'.format(host))
1969 if username:
1970 fields.append('user={0}'.format(username))
1971 fields.append('password={0}'.format(password))
1972
1973 return ' '.join(fields)
1974
1976
1977 with BenchmarkAdapter.Connection() as db:
1978
1979 db.cursor.callproc('reporting."GetTargets"', (benchmark_id,))
1980 return db.cursor.fetchall()
1981
1983
1984 with BenchmarkAdapter.Connection() as db:
1985
1986 db.cursor.callproc('reporting."GetTargetDetails"', (target_id,))
1987 return db.cursor.fetchall()
1988
1990
1991 with BenchmarkAdapter.Connection() as db:
1992
1993 db.cursor.callproc('reporting."GetAssignments"', (target_id, type))
1994 return db.cursor.fetchall()
1995
1997
1998 with BenchmarkAdapter.Connection() as db:
1999
2000 db.cursor.callproc('reporting."GetTargetSecStructureAssignments2"', (target_id, type))
2001 return db.cursor.fetchall()
2002
2004
2005 with BenchmarkAdapter.Connection() as db:
2006
2007 db.cursor.callproc('reporting."GetScores"', (benchmark_id, type))
2008 return db.cursor.fetchall()
2009
2011
2012 with BenchmarkAdapter.Connection() as db:
2013
2014 db.cursor.callproc('reporting."GetCentroids"', (benchmark_id,))
2015 return db.cursor.fetchall()
2016
2018
2019 with BenchmarkAdapter.Connection() as db:
2020
2021 db.cursor.callproc('reporting."GetTargetSegments"', (target_id,))
2022 data = db.cursor.fetchall()
2023
2024 return [ TargetSegment(row['Start'], row['End'], row['Count']) for row in data ]
2025
2027
2028 pdbfile = self._find(accession, self._pdb)
2029
2030 if not pdbfile and chain:
2031 pdbfile = self._find(accession + chain, self._pdb)
2032
2033 if not pdbfile:
2034 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
2035
2036 return self._parser(pdbfile).parse_structure()
2037
2039
2040 info = self.target_details(target_id)
2041 if not info:
2042 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
2043 row = info[0]
2044
2045 id = row["Accession"]
2046 length = float(row["Length"])
2047 overlap = float(row["MaxOverlap"]) / (length or 1.)
2048
2049 native = self.structure(id[:4], id[4]).chains[id[4]]
2050 segments = self.target_segments(target_id)
2051 target = self._factory.target(id, length, native.residues, overlap, segments)
2052
2053 source = None
2054
2055 for row in self.assignments(target_id, type):
2056
2057 src_accession = row['Source'][:4]
2058 src_chain = row['Source'][4]
2059
2060 if source is None or source.accession != src_accession:
2061 try:
2062 source = self.structure(src_accession, src_chain)
2063 except (IOError, ValueError) as ex:
2064 target.errors.append(ex)
2065 continue
2066
2067 if src_chain == '_':
2068 frag_chain = source.first_chain
2069 else:
2070 frag_chain = source.chains[src_chain]
2071 if not frag_chain.has_torsion:
2072 frag_chain.compute_torsion()
2073
2074 fragment = self._factory.assignment(
2075 source=frag_chain,
2076 start=row['SourceStart'],
2077 end=row['SourceEnd'],
2078 id=row['FragmentName'],
2079 qstart=row['Start'],
2080 qend=row['End'],
2081 probability=row['Probability'],
2082 score=row['Score'],
2083 neff=row['Neff'],
2084 rmsd=row['RMSD'],
2085 tm_score=row['TMScore'],
2086 segment=row['SegmentStart'],
2087 internal_id=row['InternalID'])
2088
2089 target.assign(fragment)
2090
2091 if ss:
2092 self._attach_sec_structure(target, target_id, type)
2093
2094 return target
2095
2097
2098 ss = {}
2099
2100 for row in self.assignments_sec_structure(target_id, type):
2101 frag_id, state = row["AssignmentID"], row["DSSP"]
2102 if row[frag_id] not in ss:
2103 ss[frag_id] = []
2104
2105 ss[frag_id].append(state)
2106
2107 for a in target.matches:
2108 if a.internal_id in ss:
2109 dssp = ''.join(ss[a.internal_id])
2110 a.secondary_structure = dssp
2111
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Fri Jul 5 14:24:54 2013 | http://epydoc.sourceforge.net |