Switch to unified view

a b/test/resources/simpleWorkflow.t2flow
1
<workflow xmlns="http://taverna.sf.net/2008/xml/t2flow" version="1" producedBy="taverna-2.4.0"><dataflow id="673b9394-9206-4395-b9bb-1f48e52cec51" role="top"><name>Matchbox_Evaluation</name><inputPorts><port><name>bc_dirlist_file_path</name><depth>0</depth><granularDepth>0</granularDepth><annotations><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
2
  <annotationAssertions>
3
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
4
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.ExampleValue">
5
        <text>C:\Users\munterberger\Desktop\mb_content.txt</text>
6
      </annotationBean>
7
      <date>2013-07-23 13:59:01.517 UTC</date>
8
      <creators />
9
      <curationEventList />
10
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
11
  </annotationAssertions>
12
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
13
  <annotationAssertions>
14
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
15
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.FreeTextDescription">
16
        <text>Path to textfile containing paths to barcode directories.</text>
17
      </annotationBean>
18
      <date>2012-10-01 13:31:43.327 UTC</date>
19
      <creators />
20
      <curationEventList />
21
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
22
  </annotationAssertions>
23
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain></annotations></port></inputPorts><outputPorts><port><name>results</name><annotations /></port><port><name>matches</name><annotations /></port><port><name>report</name><annotations /></port><port><name>evlog</name><annotations /></port></outputPorts><processors><processor><name>matchbox_evaluate</name><inputPorts><port><name>matchbox</name><depth>0</depth></port></inputPorts><outputPorts><port><name>cid</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>fm</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>iid</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>md</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>prec</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>rec</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>log</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>beanshell-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.beanshell.BeanshellActivity</class><inputMap><map from="matchbox" to="matchbox" /></inputMap><outputMap><map from="rec" to="rec" /><map from="fm" to="fm" /><map from="md" to="md" /><map from="iid" to="iid" /><map from="prec" to="prec" /><map from="cid" to="cid" /><map from="log" to="log" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean xmlns="">
24
  <inputs>
25
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
26
      <name>matchbox</name>
27
      <depth>0</depth>
28
      <mimeTypes>
29
        <string>text/plain</string>
30
      </mimeTypes>
31
      <handledReferenceSchemes />
32
      <translatedElementType>java.lang.String</translatedElementType>
33
      <allowsLiteralValues>true</allowsLiteralValues>
34
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
35
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
36
      <name>groundtruth</name>
37
      <depth>0</depth>
38
      <mimeTypes>
39
        <string>text/plain</string>
40
      </mimeTypes>
41
      <handledReferenceSchemes />
42
      <translatedElementType>java.lang.String</translatedElementType>
43
      <allowsLiteralValues>true</allowsLiteralValues>
44
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
45
  </inputs>
46
  <outputs>
47
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
48
      <name>cid</name>
49
      <depth>0</depth>
50
      <mimeTypes />
51
      <granularDepth>0</granularDepth>
52
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
53
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
54
      <name>md</name>
55
      <depth>0</depth>
56
      <mimeTypes />
57
      <granularDepth>0</granularDepth>
58
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
59
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
60
      <name>iid</name>
61
      <depth>0</depth>
62
      <mimeTypes />
63
      <granularDepth>0</granularDepth>
64
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
65
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
66
      <name>prec</name>
67
      <depth>0</depth>
68
      <mimeTypes />
69
      <granularDepth>0</granularDepth>
70
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
71
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
72
      <name>rec</name>
73
      <depth>0</depth>
74
      <mimeTypes />
75
      <granularDepth>0</granularDepth>
76
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
77
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
78
      <name>fm</name>
79
      <depth>0</depth>
80
      <mimeTypes />
81
      <granularDepth>0</granularDepth>
82
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
83
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
84
      <name>log</name>
85
      <depth>0</depth>
86
      <mimeTypes />
87
      <granularDepth>0</granularDepth>
88
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
89
  </outputs>
90
  <classLoaderSharing>workflow</classLoaderSharing>
91
  <localDependencies>
92
    <string>tb-lsdr-matchboxeval-lib-1.0-SNAPSHOT.jar</string>
93
    <string>combinatoricslib-0.2.jar</string>
94
  </localDependencies>
95
  <artifactDependencies />
96
  <script>import eu.scape_project.tb.lsdr.MatchboxEval;
97
98
MatchboxEval me = new MatchboxEval(matchbox,groundtruth);
99
100
me.evaluate();
101
102
cid = me.getTruePositives();
103
iid = me.getFalsePositives();
104
md = me.getFalseNegatives();
105
prec = me.getPrecision();
106
rec = me.getRecall();
107
fm = me.getFmeasure();
108
109
log = me.getLog();</script>
110
  <dependencies />
111
</net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
112
  <maxJobs>1</maxJobs>
113
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
114
  <backoffFactor>1.0</backoffFactor>
115
  <initialDelay>1000</initialDelay>
116
  <maxDelay>5000</maxDelay>
117
  <maxRetries>0</maxRetries>
118
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><dot><port name="matchbox" depth="0" /></dot></strategy></iteration></iterationStrategyStack></processor><processor><name>matchbox</name><inputPorts><port><name>barcode_path</name><depth>0</depth></port></inputPorts><outputPorts><port><name>STDOUT</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>external-tool-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.externaltool.ExternalToolActivity</class><inputMap><map from="barcode_path" to="barcode_path" /></inputMap><outputMap><map from="STDOUT" to="STDOUT" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.externaltool.ExternalToolActivityConfigurationBean xmlns="">
119
  <mechanismType>D0A4CDEB-DD10-4A8E-A49C-8871003083D8</mechanismType>
120
  <mechanismName>fuel</mechanismName>
121
  <mechanismXML>&lt;?xml version="1.0" encoding="UTF-8"?&gt;&#xD;
122
&lt;sshInvocation&gt;&lt;sshNode&gt;&lt;host&gt;fue.onb.ac.at&lt;/host&gt;&lt;port&gt;22&lt;/port&gt;&lt;directory&gt;/tmp/&lt;/directory&gt;&lt;linkCommand&gt;/bin/ln -s %%PATH_TO_ORIGINAL%% %%TARGET_NAME%%&lt;/linkCommand&gt;&lt;copyCommand&gt;/bin/cp %%PATH_TO_ORIGINAL%% %%TARGET_NAME%%&lt;/copyCommand&gt;&lt;/sshNode&gt;&lt;/sshInvocation&gt;&#xD;
123
</mechanismXML>
124
  <externaltoolid>eb15da53-92d0-4dac-aa40-3101b4500997</externaltoolid>
125
  <useCaseDescription>
126
    <usecaseid />
127
    <description />
128
    <command>RANDOM=`tr -dc "[:alpha:]" &lt; /dev/urandom | head -c 20`
129
PATHID=matchbox
130
TMPDIR=/tmp/${PATHID}${RANDOM}
131
mkdir ${TMPDIR}
132
cd /opt/scape/darling/
133
/usr/bin/python ./FindDuplicates.py %%barcode_path%% --featdir ${TMPDIR} all</command>
134
    <preparingTimeoutInSeconds>1200</preparingTimeoutInSeconds>
135
    <executionTimeoutInSeconds>1800</executionTimeoutInSeconds>
136
    <tags>
137
      <string>barcode_path</string>
138
    </tags>
139
    <REs />
140
    <queue__preferred />
141
    <queue__deny />
142
    <static__inputs />
143
    <inputs>
144
      <entry>
145
        <string>barcode_path</string>
146
        <de.uni__luebeck.inb.knowarc.usecases.ScriptInputUser>
147
          <tag>barcode_path</tag>
148
          <file>false</file>
149
          <tempFile>false</tempFile>
150
          <binary>false</binary>
151
          <charsetName>UTF-8</charsetName>
152
          <forceCopy>false</forceCopy>
153
          <list>false</list>
154
          <concatenate>false</concatenate>
155
          <mime />
156
        </de.uni__luebeck.inb.knowarc.usecases.ScriptInputUser>
157
      </entry>
158
    </inputs>
159
    <outputs />
160
    <includeStdIn>false</includeStdIn>
161
    <includeStdOut>true</includeStdOut>
162
    <includeStdErr>true</includeStdErr>
163
    <validReturnCodes>
164
      <int>0</int>
165
    </validReturnCodes>
166
  </useCaseDescription>
167
  <edited>false</edited>
168
</net.sf.taverna.t2.activities.externaltool.ExternalToolActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
169
  <maxJobs>4</maxJobs>
170
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
171
  <backoffFactor>1.0</backoffFactor>
172
  <initialDelay>1000</initialDelay>
173
  <maxDelay>5000</maxDelay>
174
  <maxRetries>0</maxRetries>
175
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="barcode_path" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>read_bc_dirlist_file</name><inputPorts><port><name>fileurl</name><depth>0</depth></port></inputPorts><outputPorts><port><name>filecontents</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>localworker-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.localworker.LocalworkerActivity</class><inputMap><map from="fileurl" to="fileurl" /></inputMap><outputMap><map from="filecontents" to="filecontents" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean xmlns="">
176
  <inputs>
177
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
178
      <name>fileurl</name>
179
      <depth>0</depth>
180
      <mimeTypes>
181
        <string>'text/plain'</string>
182
      </mimeTypes>
183
      <handledReferenceSchemes />
184
      <translatedElementType>java.lang.String</translatedElementType>
185
      <allowsLiteralValues>true</allowsLiteralValues>
186
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
187
  </inputs>
188
  <outputs>
189
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
190
      <name>filecontents</name>
191
      <depth>0</depth>
192
      <mimeTypes>
193
        <string>'text/plain'</string>
194
      </mimeTypes>
195
      <granularDepth>0</granularDepth>
196
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
197
  </outputs>
198
  <classLoaderSharing>workflow</classLoaderSharing>
199
  <localDependencies />
200
  <artifactDependencies />
201
  <script>BufferedReader getReader (String fileUrl) throws IOException {
202
      InputStreamReader reader;
203
      try {
204
          reader = new FileReader(fileUrl);
205
      }
206
      catch (FileNotFoundException e) {
207
          // try a real URL instead
208
          URL url = new URL(fileUrl);
209
          reader = new InputStreamReader (url.openStream());
210
      }
211
      return new BufferedReader(reader);
212
  }
213
214
215
216
StringBuffer sb = new StringBuffer(4000);
217
218
BufferedReader in = getReader(fileurl);
219
String str;
220
String lineEnding = System.getProperty("line.separator");
221
222
while ((str = in.readLine()) != null) {
223
  sb.append(str);
224
  sb.append(lineEnding);
225
}
226
in.close();
227
filecontents = sb.toString();
228
  
229
</script>
230
  <dependencies />
231
  <localworkerName>net.sourceforge.taverna.scuflworkers.io.TextFileReader</localworkerName>
232
</net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
233
  <maxJobs>1</maxJobs>
234
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
235
  <backoffFactor>1.0</backoffFactor>
236
  <initialDelay>1000</initialDelay>
237
  <maxDelay>5000</maxDelay>
238
  <maxRetries>0</maxRetries>
239
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="fileurl" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>split_bc_dirlist</name><inputPorts><port><name>regex</name><depth>0</depth></port><port><name>string</name><depth>0</depth></port></inputPorts><outputPorts><port><name>split</name><depth>1</depth><granularDepth>1</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>localworker-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.localworker.LocalworkerActivity</class><inputMap><map from="regex" to="regex" /><map from="string" to="string" /></inputMap><outputMap><map from="split" to="split" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean xmlns="">
240
  <inputs>
241
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
242
      <name>string</name>
243
      <depth>0</depth>
244
      <mimeTypes>
245
        <string>'text/plain'</string>
246
      </mimeTypes>
247
      <handledReferenceSchemes />
248
      <translatedElementType>java.lang.String</translatedElementType>
249
      <allowsLiteralValues>true</allowsLiteralValues>
250
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
251
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
252
      <name>regex</name>
253
      <depth>0</depth>
254
      <mimeTypes>
255
        <string>'text/plain'</string>
256
      </mimeTypes>
257
      <handledReferenceSchemes />
258
      <translatedElementType>java.lang.String</translatedElementType>
259
      <allowsLiteralValues>true</allowsLiteralValues>
260
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
261
  </inputs>
262
  <outputs>
263
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
264
      <name>split</name>
265
      <depth>1</depth>
266
      <mimeTypes>
267
        <string>l('text/plain')</string>
268
      </mimeTypes>
269
      <granularDepth>1</granularDepth>
270
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
271
  </outputs>
272
  <classLoaderSharing>workflow</classLoaderSharing>
273
  <localDependencies />
274
  <artifactDependencies />
275
  <script>List split = new ArrayList();
276
if (!string.equals("")) {
277
  String regexString = ",";
278
  if (regex != void) {
279
      regexString = regex;
280
  }
281
  String[] result = string.split(regexString);
282
  for (int i = 0; i &lt; result.length; i++) {
283
      split.add(result[i]);
284
  }
285
}
286
</script>
287
  <dependencies />
288
  <localworkerName>org.embl.ebi.escience.scuflworkers.java.SplitByRegex</localworkerName>
289
</net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
290
  <maxJobs>1</maxJobs>
291
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
292
  <backoffFactor>1.0</backoffFactor>
293
  <initialDelay>1000</initialDelay>
294
  <maxDelay>5000</maxDelay>
295
  <maxRetries>0</maxRetries>
296
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="regex" depth="0" /><port name="string" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>newline</name><inputPorts /><outputPorts><port><name>value</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>stringconstant-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.stringconstant.StringConstantActivity</class><inputMap /><outputMap><map from="value" to="value" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.stringconstant.StringConstantConfigurationBean xmlns="">
297
  <value>\n</value>
298
</net.sf.taverna.t2.activities.stringconstant.StringConstantConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
299
  <maxJobs>1</maxJobs>
300
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
301
  <backoffFactor>1.0</backoffFactor>
302
  <initialDelay>1000</initialDelay>
303
  <maxDelay>5000</maxDelay>
304
  <maxRetries>0</maxRetries>
305
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy /></iteration></iterationStrategyStack></processor><processor><name>parse_matchbox_stdout</name><inputPorts><port><name>matchbox_stdout</name><depth>0</depth></port><port><name>barcode_path</name><depth>0</depth></port></inputPorts><outputPorts><port><name>duplicates_result</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>duplicates_matches</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>beanshell-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.beanshell.BeanshellActivity</class><inputMap><map from="barcode_path" to="barcode_path" /><map from="matchbox_stdout" to="matchbox_stdout" /></inputMap><outputMap><map from="duplicates_result" to="duplicates_result" /><map from="duplicates_matches" to="duplicates_matches" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean xmlns="">
306
  <inputs>
307
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
308
      <name>barcode_path</name>
309
      <depth>0</depth>
310
      <mimeTypes>
311
        <string>text/plain</string>
312
      </mimeTypes>
313
      <handledReferenceSchemes />
314
      <translatedElementType>java.lang.String</translatedElementType>
315
      <allowsLiteralValues>true</allowsLiteralValues>
316
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
317
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
318
      <name>matchbox_stdout</name>
319
      <depth>0</depth>
320
      <mimeTypes>
321
        <string>text/plain</string>
322
      </mimeTypes>
323
      <handledReferenceSchemes />
324
      <translatedElementType>java.lang.String</translatedElementType>
325
      <allowsLiteralValues>true</allowsLiteralValues>
326
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
327
  </inputs>
328
  <outputs>
329
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
330
      <name>duplicates_result</name>
331
      <depth>0</depth>
332
      <mimeTypes />
333
      <granularDepth>0</granularDepth>
334
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
335
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
336
      <name>duplicates_matches</name>
337
      <depth>0</depth>
338
      <mimeTypes />
339
      <granularDepth>0</granularDepth>
340
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
341
  </outputs>
342
  <classLoaderSharing>workflow</classLoaderSharing>
343
  <localDependencies />
344
  <artifactDependencies />
345
  <script>#!Pairtree pt = new Pairtree();
346
#!String id = pt.mapToId("/mnt/abonas/linktree/", barcode_path.substring(0,barcode_path.lastIndexOf("/")));
347
String duplicates_result = "";
348
String duplicates_matches = "";
349
duplicates_result +=  barcode_path+ ":\n";
350
duplicates_matches +=  barcode_path + "\t";
351
StringTokenizer st = new StringTokenizer(matchbox_stdout, "\n");
352
boolean startDuplicates = false;
353
boolean hasDuplicates = false;
354
while (st.hasMoreTokens()) {
355
    String token = st.nextToken();
356
    if (startDuplicates) {
357
        if (token.contains("=&gt;")) {
358
            duplicates_result += token + "\n";
359
            hasDuplicates = true;
360
        }
361
362
    }
363
    if (token.contains("=== List of detected duplicates ===")) {
364
        startDuplicates = true;
365
    }
366
}
367
if(hasDuplicates)
368
    duplicates_matches += "1";
369
else
370
    duplicates_matches += "0";</script>
371
  <dependencies />
372
</net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
373
  <maxJobs>1</maxJobs>
374
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
375
  <backoffFactor>1.0</backoffFactor>
376
  <initialDelay>1000</initialDelay>
377
  <maxDelay>5000</maxDelay>
378
  <maxRetries>0</maxRetries>
379
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><dot><port name="matchbox_stdout" depth="0" /><port name="barcode_path" depth="0" /></dot></strategy></iteration></iterationStrategyStack></processor><processor><name>report</name><inputPorts><port><name>cid</name><depth>0</depth></port><port><name>fm</name><depth>0</depth></port><port><name>iid</name><depth>0</depth></port><port><name>md</name><depth>0</depth></port><port><name>prec</name><depth>0</depth></port><port><name>rec</name><depth>0</depth></port></inputPorts><outputPorts><port><name>report</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>beanshell-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.beanshell.BeanshellActivity</class><inputMap><map from="rec" to="rec" /><map from="fm" to="fm" /><map from="md" to="md" /><map from="iid" to="iid" /><map from="prec" to="prec" /><map from="cid" to="cid" /></inputMap><outputMap><map from="report" to="report" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean xmlns="">
380
  <inputs>
381
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
382
      <name>prec</name>
383
      <depth>0</depth>
384
      <mimeTypes>
385
        <string>text/plain</string>
386
      </mimeTypes>
387
      <handledReferenceSchemes />
388
      <translatedElementType>java.lang.String</translatedElementType>
389
      <allowsLiteralValues>true</allowsLiteralValues>
390
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
391
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
392
      <name>md</name>
393
      <depth>0</depth>
394
      <mimeTypes>
395
        <string>text/plain</string>
396
      </mimeTypes>
397
      <handledReferenceSchemes />
398
      <translatedElementType>java.lang.String</translatedElementType>
399
      <allowsLiteralValues>true</allowsLiteralValues>
400
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
401
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
402
      <name>iid</name>
403
      <depth>0</depth>
404
      <mimeTypes>
405
        <string>text/plain</string>
406
      </mimeTypes>
407
      <handledReferenceSchemes />
408
      <translatedElementType>java.lang.String</translatedElementType>
409
      <allowsLiteralValues>true</allowsLiteralValues>
410
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
411
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
412
      <name>fm</name>
413
      <depth>0</depth>
414
      <mimeTypes>
415
        <string>text/plain</string>
416
      </mimeTypes>
417
      <handledReferenceSchemes />
418
      <translatedElementType>java.lang.String</translatedElementType>
419
      <allowsLiteralValues>true</allowsLiteralValues>
420
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
421
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
422
      <name>cid</name>
423
      <depth>0</depth>
424
      <mimeTypes>
425
        <string>text/plain</string>
426
      </mimeTypes>
427
      <handledReferenceSchemes />
428
      <translatedElementType>java.lang.String</translatedElementType>
429
      <allowsLiteralValues>true</allowsLiteralValues>
430
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
431
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
432
      <name>rec</name>
433
      <depth>0</depth>
434
      <mimeTypes>
435
        <string>text/plain</string>
436
      </mimeTypes>
437
      <handledReferenceSchemes />
438
      <translatedElementType>java.lang.String</translatedElementType>
439
      <allowsLiteralValues>true</allowsLiteralValues>
440
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
441
  </inputs>
442
  <outputs>
443
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
444
      <name>report</name>
445
      <depth>0</depth>
446
      <mimeTypes />
447
      <granularDepth>0</granularDepth>
448
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
449
  </outputs>
450
  <classLoaderSharing>workflow</classLoaderSharing>
451
  <localDependencies />
452
  <artifactDependencies />
453
  <script>report = "Correctly identified duplicates (true positives): "+cid+"\n"+
454
"Incorrectly identified duplicates (false positives): "+iid+"\n"+
455
"Missed duplicates (false negatives): "+md+"\n"+
456
"Precision: "+prec+"%\n"+
457
"Recall: "+rec+"%\n"+
458
"F-Measure: "+fm+"%\n";</script>
459
  <dependencies />
460
</net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
461
  <maxJobs>1</maxJobs>
462
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
463
  <backoffFactor>1.0</backoffFactor>
464
  <initialDelay>1000</initialDelay>
465
  <maxDelay>5000</maxDelay>
466
  <maxRetries>0</maxRetries>
467
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><dot><port name="cid" depth="0" /><port name="fm" depth="0" /><port name="iid" depth="0" /><port name="md" depth="0" /><port name="prec" depth="0" /><port name="rec" depth="0" /></dot></strategy></iteration></iterationStrategyStack></processor></processors><conditions /><datalinks><datalink><sink type="processor"><processor>matchbox_evaluate</processor><port>matchbox</port></sink><source type="processor"><processor>parse_matchbox_stdout</processor><port>duplicates_result</port></source></datalink><datalink><sink type="processor"><processor>matchbox</processor><port>barcode_path</port></sink><source type="processor"><processor>split_bc_dirlist</processor><port>split</port></source></datalink><datalink><sink type="processor"><processor>read_bc_dirlist_file</processor><port>fileurl</port></sink><source type="dataflow"><port>bc_dirlist_file_path</port></source></datalink><datalink><sink type="processor"><processor>split_bc_dirlist</processor><port>regex</port></sink><source type="processor"><processor>newline</processor><port>value</port></source></datalink><datalink><sink type="processor"><processor>split_bc_dirlist</processor><port>string</port></sink><source type="processor"><processor>read_bc_dirlist_file</processor><port>filecontents</port></source></datalink><datalink><sink type="processor"><processor>parse_matchbox_stdout</processor><port>matchbox_stdout</port></sink><source type="processor"><processor>matchbox</processor><port>STDOUT</port></source></datalink><datalink><sink type="processor"><processor>parse_matchbox_stdout</processor><port>barcode_path</port></sink><source type="processor"><processor>split_bc_dirlist</processor><port>split</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>cid</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>cid</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>fm</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>fm</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>iid</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>iid</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>md</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>md</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>prec</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>prec</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>rec</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>rec</port></source></datalink><datalink><sink type="dataflow"><port>results</port></sink><source type="processor"><processor>parse_matchbox_stdout</processor><port>duplicates_result</port></source></datalink><datalink><sink type="dataflow"><port>matches</port></sink><source type="processor"><processor>parse_matchbox_stdout</processor><port>duplicates_matches</port></source></datalink><datalink><sink type="dataflow"><port>report</port></sink><source type="processor"><processor>report</processor><port>report</port></source></datalink><datalink><sink type="dataflow"><port>evlog</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>log</port></source></datalink></datalinks><annotations><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
468
  <annotationAssertions>
469
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
470
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
471
        <identification>2efd4378-62f5-41ea-bf6f-dd7d50cd732e</identification>
472
      </annotationBean>
473
      <date>2012-10-01 13:08:54.255 UTC</date>
474
      <creators />
475
      <curationEventList />
476
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
477
  </annotationAssertions>
478
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
479
  <annotationAssertions>
480
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
481
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
482
        <identification>6d1c206f-f442-49d3-97b3-50cd4ac66b5f</identification>
483
      </annotationBean>
484
      <date>2012-09-26 15:20:31.846 UTC</date>
485
      <creators />
486
      <curationEventList />
487
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
488
  </annotationAssertions>
489
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
490
  <annotationAssertions>
491
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
492
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
493
        <identification>801043a1-195a-4b7d-8dd4-c1ebfa8d3b41</identification>
494
      </annotationBean>
495
      <date>2012-10-01 12:37:43.3 UTC</date>
496
      <creators />
497
      <curationEventList />
498
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
499
  </annotationAssertions>
500
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
501
  <annotationAssertions>
502
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
503
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
504
        <identification>81553fe0-9aa5-4fc7-a775-19e96144f650</identification>
505
      </annotationBean>
506
      <date>2012-10-02 09:38:25.821 UTC</date>
507
      <creators />
508
      <curationEventList />
509
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
510
  </annotationAssertions>
511
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
512
  <annotationAssertions>
513
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
514
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
515
        <identification>673b9394-9206-4395-b9bb-1f48e52cec51</identification>
516
      </annotationBean>
517
      <date>2014-04-28 09:04:23.655 UTC</date>
518
      <creators />
519
      <curationEventList />
520
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
521
  </annotationAssertions>
522
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
523
  <annotationAssertions>
524
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
525
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
526
        <identification>506fc52e-7052-4f57-8c15-83821665c03e</identification>
527
      </annotationBean>
528
      <date>2012-10-02 09:56:29.495 UTC</date>
529
      <creators />
530
      <curationEventList />
531
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
532
  </annotationAssertions>
533
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
534
  <annotationAssertions>
535
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
536
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.FreeTextDescription">
537
        <text>Matchbox evaluation against ground truth. The evaluation process first
538
creates the matchbox output and ground truth lists. It then counts each page
539
tuple from the matchbox output that is in the ground truth as correctly
540
identified tuple (true positive). Those that are not in the ground truth are
541
counted as incorrectly identified tuples (false positives), and finally,
542
those that are in the ground truth but not in the matchbox output are counted
543
as missed tuples (false negatives).
544
The precision is then calculated as the number of true positives (i.e. the 
545
number of items correctly labeled as duplicate page pairs) divided by the 
546
total number of elements assumed to be duplicate page pairs (i.e. the sum of 
547
true positives and false positives, which are items incorrectly labeled as 
548
being duplicate page pairs ). Recall is then defined as the number of 
549
true positives divided by the total number of elements of duplicate page 
550
pairs (i.e. the sum of true positives and false negatives, which are items 
551
have not been labeled as being duplicate page pairs but actually should have 
552
been).
553
The ground truth contains single page instances without duplicates and 
554
n-tuples (duplicates, triples, quadruples, etc.). n-tuples with n&gt;2 are 
555
expanded, the result is a list of 2-tuples which is used to determine the
556
number of missed duplicates (false negatives).</text>
557
      </annotationBean>
558
      <date>2012-10-02 12:36:08.934 UTC</date>
559
      <creators />
560
      <curationEventList />
561
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
562
  </annotationAssertions>
563
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
564
  <annotationAssertions>
565
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
566
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
567
        <identification>e26a0bd8-eb71-4fd3-9bc6-238d71792ed3</identification>
568
      </annotationBean>
569
      <date>2012-10-01 12:46:11.91 UTC</date>
570
      <creators />
571
      <curationEventList />
572
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
573
  </annotationAssertions>
574
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
575
  <annotationAssertions>
576
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
577
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
578
        <identification>857c050a-49b5-4606-bcdb-220455d64291</identification>
579
      </annotationBean>
580
      <date>2012-10-01 12:33:52.730 UTC</date>
581
      <creators />
582
      <curationEventList />
583
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
584
  </annotationAssertions>
585
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
586
  <annotationAssertions>
587
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
588
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
589
        <identification>f85240d9-2108-4057-8c42-78eef7ecf441</identification>
590
      </annotationBean>
591
      <date>2012-10-01 12:27:16.65 UTC</date>
592
      <creators />
593
      <curationEventList />
594
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
595
  </annotationAssertions>
596
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
597
  <annotationAssertions>
598
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
599
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
600
        <identification>79894ee6-b109-41b7-95db-8178b7eb4121</identification>
601
      </annotationBean>
602
      <date>2012-10-02 09:56:58.139 UTC</date>
603
      <creators />
604
      <curationEventList />
605
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
606
  </annotationAssertions>
607
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
608
  <annotationAssertions>
609
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
610
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
611
        <identification>66473d1e-e539-497e-874b-28f42198a8f3</identification>
612
      </annotationBean>
613
      <date>2012-09-26 15:18:43.167 UTC</date>
614
      <creators />
615
      <curationEventList />
616
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
617
  </annotationAssertions>
618
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
619
  <annotationAssertions>
620
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
621
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
622
        <identification>eaa27b27-7bc6-4461-875e-499f30946c6a</identification>
623
      </annotationBean>
624
      <date>2013-07-23 13:59:10.99 UTC</date>
625
      <creators />
626
      <curationEventList />
627
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
628
  </annotationAssertions>
629
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
630
  <annotationAssertions>
631
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
632
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
633
        <identification>8538abb1-b420-4d8e-be5f-64da636f59c1</identification>
634
      </annotationBean>
635
      <date>2012-10-02 09:37:23.56 UTC</date>
636
      <creators />
637
      <curationEventList />
638
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
639
  </annotationAssertions>
640
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
641
  <annotationAssertions>
642
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
643
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
644
        <identification>24c30e8d-a759-497e-ae9a-4c2c6cfb7bcf</identification>
645
      </annotationBean>
646
      <date>2012-10-01 12:40:56.91 UTC</date>
647
      <creators />
648
      <curationEventList />
649
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
650
  </annotationAssertions>
651
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
652
  <annotationAssertions>
653
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
654
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
655
        <identification>696a6a59-a428-4203-9160-0867acbb4f3a</identification>
656
      </annotationBean>
657
      <date>2012-10-01 13:32:02.56 UTC</date>
658
      <creators />
659
      <curationEventList />
660
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
661
  </annotationAssertions>
662
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
663
  <annotationAssertions>
664
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
665
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
666
        <identification>32d73c43-4cf5-4abe-a452-816bd12dc063</identification>
667
      </annotationBean>
668
      <date>2012-10-01 12:56:53.971 UTC</date>
669
      <creators />
670
      <curationEventList />
671
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
672
  </annotationAssertions>
673
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
674
  <annotationAssertions>
675
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
676
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
677
        <identification>9ac568e3-fef8-4bb5-b035-f3680e35e284</identification>
678
      </annotationBean>
679
      <date>2012-10-02 12:28:00.33 UTC</date>
680
      <creators />
681
      <curationEventList />
682
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
683
  </annotationAssertions>
684
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
685
  <annotationAssertions>
686
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
687
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
688
        <identification>2a9e3405-7051-4feb-ba82-9ec74af23557</identification>
689
      </annotationBean>
690
      <date>2012-10-01 12:38:49.60 UTC</date>
691
      <creators />
692
      <curationEventList />
693
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
694
  </annotationAssertions>
695
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
696
  <annotationAssertions>
697
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
698
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
699
        <identification>f24d982c-72c1-4654-b2b3-378a99dc1287</identification>
700
      </annotationBean>
701
      <date>2013-07-24 07:41:18.881 UTC</date>
702
      <creators />
703
      <curationEventList />
704
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
705
  </annotationAssertions>
706
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
707
  <annotationAssertions>
708
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
709
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.DescriptiveTitle">
710
        <text>Matchbox Evaluation</text>
711
      </annotationBean>
712
      <date>2012-10-02 09:38:19.704 UTC</date>
713
      <creators />
714
      <curationEventList />
715
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
716
  </annotationAssertions>
717
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
718
  <annotationAssertions>
719
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
720
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
721
        <identification>0ce1f37b-c130-4473-996b-2e88763b69fc</identification>
722
      </annotationBean>
723
      <date>2012-10-01 13:09:53.436 UTC</date>
724
      <creators />
725
      <curationEventList />
726
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
727
  </annotationAssertions>
728
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
729
  <annotationAssertions>
730
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
731
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
732
        <identification>6fa652e4-4cd9-462e-adee-7698bd80de14</identification>
733
      </annotationBean>
734
      <date>2012-10-01 12:33:37.287 UTC</date>
735
      <creators />
736
      <curationEventList />
737
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
738
  </annotationAssertions>
739
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
740
  <annotationAssertions>
741
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
742
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.Author">
743
        <text>Sven Schlarb</text>
744
      </annotationBean>
745
      <date>2012-10-02 12:36:16.119 UTC</date>
746
      <creators />
747
      <curationEventList />
748
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
749
  </annotationAssertions>
750
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
751
  <annotationAssertions>
752
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
753
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
754
        <identification>c16b4b3d-a30d-44fb-a6db-470e4b08c1b8</identification>
755
      </annotationBean>
756
      <date>2012-09-26 15:09:52.547 UTC</date>
757
      <creators />
758
      <curationEventList />
759
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
760
  </annotationAssertions>
761
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
762
  <annotationAssertions>
763
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
764
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
765
        <identification>72783ac8-1be8-47c7-834e-90b211fd678d</identification>
766
      </annotationBean>
767
      <date>2012-10-02 09:41:25.373 UTC</date>
768
      <creators />
769
      <curationEventList />
770
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
771
  </annotationAssertions>
772
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
773
  <annotationAssertions>
774
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
775
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
776
        <identification>318bf84e-aeac-4cf5-8fa9-1f18fc42a308</identification>
777
      </annotationBean>
778
      <date>2012-10-01 12:36:43.490 UTC</date>
779
      <creators />
780
      <curationEventList />
781
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
782
  </annotationAssertions>
783
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
784
  <annotationAssertions>
785
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
786
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
787
        <identification>5b2b5686-2a2c-4b25-a525-3d5c49ce8e48</identification>
788
      </annotationBean>
789
      <date>2012-10-02 11:32:35.237 UTC</date>
790
      <creators />
791
      <curationEventList />
792
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
793
  </annotationAssertions>
794
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
795
  <annotationAssertions>
796
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
797
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
798
        <identification>29e62dd9-20e7-4851-abc5-17d0dbc3d805</identification>
799
      </annotationBean>
800
      <date>2012-10-02 12:36:17.294 UTC</date>
801
      <creators />
802
      <curationEventList />
803
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
804
  </annotationAssertions>
805
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
806
  <annotationAssertions>
807
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
808
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
809
        <identification>a0ac15b5-549d-48f0-8d8d-dafb284f2f7b</identification>
810
      </annotationBean>
811
      <date>2012-10-01 13:28:45.414 UTC</date>
812
      <creators />
813
      <curationEventList />
814
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
815
  </annotationAssertions>
816
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2></annotations></dataflow></workflow>