Skip to content

Commit b343c0d

Browse files
committed
Autogenerated literator docs for v0.1.0
1 parent 7d8216c commit b343c0d

File tree

5 files changed

+435
-0
lines changed

5 files changed

+435
-0
lines changed

docs/src/main/scala/entry.scala.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
2+
```scala
3+
package com.bio4j.data.enzyme
4+
5+
trait AnyEntry extends Any {
6+
7+
def ID: String
8+
def subSubClassID: String
9+
10+
def description: String
11+
def alternativeNames: Seq[String]
12+
def cofactors: Seq[String]
13+
def catalyticActivity: String
14+
def comments: Seq[String]
15+
}
16+
17+
sealed trait EnzymeClasses extends Any {
18+
19+
def ID: String
20+
def description: String
21+
}
22+
case class EnzymeClass(val ID: String, val description: String) extends EnzymeClasses
23+
case class EnzymeSubClass(val ID: String, val description: String) extends EnzymeClasses
24+
case class EnzymeSubSubClass(val ID: String, val description: String) extends EnzymeClasses
25+
26+
```
27+
28+
29+
30+
31+
[test/scala/EnzymeEntries.scala]: ../../test/scala/EnzymeEntries.scala.md
32+
[test/scala/EnzymeClasses.scala]: ../../test/scala/EnzymeClasses.scala.md
33+
[main/scala/entry.scala]: entry.scala.md
34+
[main/scala/flat/entry.scala]: flat/entry.scala.md
35+
[main/scala/flat/classes.scala]: flat/classes.scala.md
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
2+
```scala
3+
package com.bio4j.data.enzyme.flat
4+
5+
import com.bio4j.data.enzyme._
6+
7+
case class ClassLine(val line: String) {
8+
9+
final def asEnzymeClass: EnzymeClasses = {
10+
11+
ID match {
12+
// the order is important here
13+
case id if (id == classID) => EnzymeClass(id, description)
14+
case id if (id == subClassID) => EnzymeSubClass(id, description)
15+
case id if (id == subSubClassID) => EnzymeSubSubClass(id, description)
16+
}
17+
}
18+
```
19+
20+
21+
In the `enzclass.txt` source file the id always takes 9 characters, but it has funny empty spaces around.
22+
23+
24+
```scala
25+
private lazy val ID: String =
26+
line
27+
.take(9)
28+
.filter(_ != ' ')
29+
30+
private lazy val IDFragments: (String,String,String,String) = {
31+
32+
val fragments = ID.split('.').take(4)
33+
34+
(fragments(0), fragments(1), fragments(2), fragments(3))
35+
}
36+
37+
private def classID: String =
38+
s"${IDFragments._1}.-.-.-"
39+
40+
private def subClassID: String =
41+
s"${IDFragments._1}.${IDFragments._2}.-.-"
42+
43+
private def subSubClassID: String =
44+
s"${IDFragments._1}.${IDFragments._2}.${IDFragments._3}.-"
45+
```
46+
47+
48+
We don't want to store the description with a dot at the end!
49+
50+
51+
```scala
52+
private lazy val description: String =
53+
line
54+
.drop(9)
55+
.trim
56+
.stripSuffix(".")
57+
}
58+
59+
case object enzymeClasses {
60+
```
61+
62+
63+
The Enzyme source file `enzclass.txt` starts with:
64+
65+
```
66+
---------------------------------------------------------------------------
67+
ENZYME nomenclature database
68+
SIB Swiss Institute of Bioinformatics; Geneva, Switzerland
69+
----------------------------------------------------------------------------
70+
71+
Description: Definition of enzyme classes, subclasses and sub-subclasses
72+
Name: enzclass.txt
73+
Release: 07-Sep-2016
74+
75+
----------------------------------------------------------------------------
76+
77+
1. -. -.- Oxidoreductases.
78+
1. 1. -.- Acting on the CH-OH group of donors.
79+
```
80+
81+
it also ends with:
82+
83+
```
84+
----------------------------------------------------------------------------
85+
Copyrighted by the SIB Swiss Institute of Bioinformatics.
86+
There are no restrictions on its use by any institutions as long as
87+
its content is in no way modified.
88+
----------------------------------------------------------------------------
89+
```
90+
91+
so we are only picking lines with a dot in the second char.
92+
93+
Note that there empty lines now and then, which need to be filtered out too.
94+
95+
96+
```scala
97+
def fromLines(lines: Iterator[String]): Iterator[EnzymeClasses] =
98+
lines
99+
.filter(_.nonEmpty)
100+
.collect { case line if(line(1) == '.') => ClassLine(line).asEnzymeClass }
101+
}
102+
103+
```
104+
105+
106+
107+
108+
[test/scala/EnzymeEntries.scala]: ../../../test/scala/EnzymeEntries.scala.md
109+
[test/scala/EnzymeClasses.scala]: ../../../test/scala/EnzymeClasses.scala.md
110+
[main/scala/entry.scala]: ../entry.scala.md
111+
[main/scala/flat/entry.scala]: entry.scala.md
112+
[main/scala/flat/classes.scala]: classes.scala.md
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
2+
```scala
3+
package com.bio4j.data.enzyme.flat
4+
5+
import com.bio4j.data.enzyme._
6+
7+
case class Entry(val lines: Seq[String]) extends AnyEntry {
8+
9+
def ID: String =
10+
id.value
11+
12+
def subSubClassID: String =
13+
id.subSubClassID
14+
15+
def description: String =
16+
de.description
17+
18+
def alternativeNames: Seq[String] =
19+
an.alternativeNames
20+
21+
def cofactors: Seq[String] =
22+
cf.cofactors
23+
24+
def catalyticActivity: String =
25+
ca.catalyticActivity
26+
27+
def comments: Seq[String] =
28+
cc.comments
29+
30+
private lazy val id: ID =
31+
new ID(linesWith(prefix = "ID").head)
32+
33+
private lazy val de: DE =
34+
DE(linesWith(prefix = "DE"))
35+
36+
private lazy val an: AN =
37+
AN(linesWith(prefix = "AN"))
38+
39+
private lazy val cf: CF =
40+
CF(linesWith(prefix = "CF"))
41+
42+
private lazy val ca: CA =
43+
CA(linesWith(prefix = "CA"))
44+
45+
private lazy val cc: CC =
46+
CC(linesWith(prefix = "CC"))
47+
48+
private def linesWith(prefix: String): Seq[String] =
49+
lines collect { case line if(line startsWith prefix) => line.stripPrefix(prefix).trim }
50+
}
51+
52+
private case class ID(val value: String) extends AnyVal {
53+
54+
def subSubClassID: String =
55+
s"${value.reverse.dropWhile(_ != '.').reverse}-"
56+
}
57+
58+
private case class DE(val lines: Seq[String]) extends AnyVal {
59+
60+
def description: String =
61+
lines
62+
.map(_.trim.stripSuffix("."))
63+
.mkString(" ")
64+
}
65+
66+
private case class AN(val lines: Seq[String]) extends AnyVal {
67+
68+
def alternativeNames: Seq[String] =
69+
lines
70+
.mkString(" ")
71+
.split('.')
72+
}
73+
74+
private case class CF(val lines: Seq[String]) extends AnyVal {
75+
76+
def cofactors: Seq[String] =
77+
lines
78+
.mkString("")
79+
.split(';')
80+
.map(_.trim.stripSuffix("."))
81+
}
82+
83+
private case class CA(val lines: Seq[String]) extends AnyVal {
84+
85+
def catalyticActivity: String =
86+
lines.mkString(" ")
87+
}
88+
89+
private case class CC(val lines: Seq[String]) extends AnyVal {
90+
91+
def comments: Seq[String] =
92+
lines.mkString(" ")
93+
.split("-!-")
94+
.collect { case txt if(txt.nonEmpty) => txt.trim.stripSuffix(".") }
95+
}
96+
97+
case object entries {
98+
```
99+
100+
101+
ENZYME entries file have a "header" consisting on CC lines and an end of entry // line.
102+
103+
104+
```scala
105+
def fromLines(lines: Seq[String]): Seq[Entry] =
106+
entryLines(lines.dropWhile( l => l.startsWith("CC") || l.startsWith("//") )).map { Entry(_) }
107+
108+
def validFromLines(lines: Seq[String]): Seq[Entry] =
109+
fromLines(lines) filter isValid
110+
```
111+
112+
113+
See ftp://ftp.expasy.org/databases/enzyme/enzuser.txt
114+
115+
116+
```scala
117+
private def isValid(entry: Entry): Boolean =
118+
!( entry.description.startsWith("Deleted entry") || entry.description.startsWith("Transferred entry") )
119+
120+
@annotation.tailrec
121+
private def entryLinesRec(
122+
currentLine: Option[String],
123+
linesLeft: Seq[String],
124+
entryAcc: Seq[String],
125+
acc: Seq[Seq[String]]
126+
)
127+
: Seq[Seq[String]] =
128+
currentLine match {
129+
case None => acc
130+
case Some(line) => {
131+
132+
if(isEndLine(line))
133+
entryLinesRec(
134+
currentLine = linesLeft.headOption,
135+
linesLeft = if(linesLeft.isEmpty) Seq() else linesLeft.tail,
136+
entryAcc = Seq(),
137+
acc = acc :+ entryAcc
138+
)
139+
else
140+
entryLinesRec(
141+
currentLine = linesLeft.headOption,
142+
linesLeft = if(linesLeft.isEmpty) Seq() else linesLeft.tail,
143+
entryAcc = entryAcc :+ line,
144+
acc = acc
145+
)
146+
}
147+
}
148+
149+
private def entryLines(lines: Seq[String]): Seq[Seq[String]] =
150+
entryLinesRec(
151+
currentLine = lines.headOption,
152+
linesLeft = lines.tail,
153+
entryAcc = Seq(),
154+
acc = Seq()
155+
)
156+
157+
private def isEndLine(line: String) =
158+
line.startsWith("//")
159+
}
160+
161+
```
162+
163+
164+
165+
166+
[test/scala/EnzymeEntries.scala]: ../../../test/scala/EnzymeEntries.scala.md
167+
[test/scala/EnzymeClasses.scala]: ../../../test/scala/EnzymeClasses.scala.md
168+
[main/scala/entry.scala]: ../entry.scala.md
169+
[main/scala/flat/entry.scala]: entry.scala.md
170+
[main/scala/flat/classes.scala]: classes.scala.md
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
```scala
3+
package com.bio4j.data.enzyme.test
4+
5+
import org.scalatest.FunSuite
6+
7+
import com.bio4j.data.enzyme._
8+
9+
class ParseEnzymeClasses extends FunSuite {
10+
11+
def lines =
12+
io.Source.fromFile("enzclass.txt").getLines
13+
14+
def allEnzymeClasses = flat.enzymeClasses.fromLines(lines)
15+
16+
test("parse all enzyme classes") {
17+
18+
allEnzymeClasses.foreach { e =>
19+
20+
val clazz = e
21+
}
22+
}
23+
```
24+
25+
26+
This is unlikely to change
27+
28+
29+
```scala
30+
test("check first classes") {
31+
32+
val firstFive = (allEnzymeClasses take 5).toList
33+
34+
assert {
35+
36+
firstFive === List[EnzymeClasses](
37+
EnzymeClass("1.-.-.-", "Oxidoreductases"),
38+
EnzymeSubClass("1.1.-.-", "Acting on the CH-OH group of donors"),
39+
EnzymeSubSubClass("1.1.1.-", "With NAD(+) or NADP(+) as acceptor"),
40+
EnzymeSubSubClass("1.1.2.-", "With a cytochrome as acceptor"),
41+
EnzymeSubSubClass("1.1.3.-", "With oxygen as acceptor")
42+
)
43+
}
44+
}
45+
}
46+
47+
```
48+
49+
50+
51+
52+
[test/scala/EnzymeEntries.scala]: EnzymeEntries.scala.md
53+
[test/scala/EnzymeClasses.scala]: EnzymeClasses.scala.md
54+
[main/scala/entry.scala]: ../../main/scala/entry.scala.md
55+
[main/scala/flat/entry.scala]: ../../main/scala/flat/entry.scala.md
56+
[main/scala/flat/classes.scala]: ../../main/scala/flat/classes.scala.md

0 commit comments

Comments
 (0)