Switch to unified view

a b/test/resources/matchbox.t2flow
1
<workflow xmlns="http://taverna.sf.net/2008/xml/t2flow" version="1" producedBy="taverna-2.4.0"><dataflow id="f24d982c-72c1-4654-b2b3-378a99dc1287" role="top"><name>Matchbox_Evaluation</name><inputPorts><port><name>gt_filelist_path</name><depth>0</depth><granularDepth>0</granularDepth><annotations><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
2
  <annotationAssertions>
3
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
4
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.ExampleValue">
5
        <text>C:\Users\munterberger\Desktop\gt_content.txt</text>
6
      </annotationBean>
7
      <date>2013-07-23 13:59:09.957 UTC</date>
8
      <creators />
9
      <curationEventList />
10
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
11
  </annotationAssertions>
12
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
13
  <annotationAssertions>
14
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
15
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.FreeTextDescription">
16
        <text>Path to textfile containing paths to groundtruth files.</text>
17
      </annotationBean>
18
      <date>2012-10-01 13:31:24.101 UTC</date>
19
      <creators />
20
      <curationEventList />
21
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
22
  </annotationAssertions>
23
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain></annotations></port><port><name>bc_dirlist_file_path</name><depth>0</depth><granularDepth>0</granularDepth><annotations><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
24
  <annotationAssertions>
25
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
26
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.ExampleValue">
27
        <text>C:\Users\munterberger\Desktop\mb_content.txt</text>
28
      </annotationBean>
29
      <date>2013-07-23 13:59:01.517 UTC</date>
30
      <creators />
31
      <curationEventList />
32
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
33
  </annotationAssertions>
34
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
35
  <annotationAssertions>
36
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
37
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.FreeTextDescription">
38
        <text>Path to textfile containing paths to barcode directories.</text>
39
      </annotationBean>
40
      <date>2012-10-01 13:31:43.327 UTC</date>
41
      <creators />
42
      <curationEventList />
43
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
44
  </annotationAssertions>
45
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain></annotations></port></inputPorts><outputPorts><port><name>results</name><annotations /></port><port><name>stderr</name><annotations /></port><port><name>stdout</name><annotations /></port><port><name>matches</name><annotations /></port><port><name>report</name><annotations /></port><port><name>evlog</name><annotations /></port></outputPorts><processors><processor><name>matchbox_evaluate</name><inputPorts><port><name>groundtruth</name><depth>0</depth></port><port><name>matchbox</name><depth>0</depth></port></inputPorts><outputPorts><port><name>cid</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>fm</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>iid</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>md</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>prec</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>rec</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>log</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>beanshell-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.beanshell.BeanshellActivity</class><inputMap><map from="groundtruth" to="groundtruth" /><map from="matchbox" to="matchbox" /></inputMap><outputMap><map from="fm" to="fm" /><map from="rec" to="rec" /><map from="md" to="md" /><map from="iid" to="iid" /><map from="log" to="log" /><map from="cid" to="cid" /><map from="prec" to="prec" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean xmlns="">
46
  <inputs>
47
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
48
      <name>matchbox</name>
49
      <depth>0</depth>
50
      <mimeTypes>
51
        <string>text/plain</string>
52
      </mimeTypes>
53
      <handledReferenceSchemes />
54
      <translatedElementType>java.lang.String</translatedElementType>
55
      <allowsLiteralValues>true</allowsLiteralValues>
56
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
57
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
58
      <name>groundtruth</name>
59
      <depth>0</depth>
60
      <mimeTypes>
61
        <string>text/plain</string>
62
      </mimeTypes>
63
      <handledReferenceSchemes />
64
      <translatedElementType>java.lang.String</translatedElementType>
65
      <allowsLiteralValues>true</allowsLiteralValues>
66
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
67
  </inputs>
68
  <outputs>
69
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
70
      <name>cid</name>
71
      <depth>0</depth>
72
      <mimeTypes />
73
      <granularDepth>0</granularDepth>
74
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
75
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
76
      <name>md</name>
77
      <depth>0</depth>
78
      <mimeTypes />
79
      <granularDepth>0</granularDepth>
80
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
81
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
82
      <name>iid</name>
83
      <depth>0</depth>
84
      <mimeTypes />
85
      <granularDepth>0</granularDepth>
86
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
87
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
88
      <name>prec</name>
89
      <depth>0</depth>
90
      <mimeTypes />
91
      <granularDepth>0</granularDepth>
92
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
93
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
94
      <name>rec</name>
95
      <depth>0</depth>
96
      <mimeTypes />
97
      <granularDepth>0</granularDepth>
98
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
99
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
100
      <name>fm</name>
101
      <depth>0</depth>
102
      <mimeTypes />
103
      <granularDepth>0</granularDepth>
104
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
105
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
106
      <name>log</name>
107
      <depth>0</depth>
108
      <mimeTypes />
109
      <granularDepth>0</granularDepth>
110
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
111
  </outputs>
112
  <classLoaderSharing>workflow</classLoaderSharing>
113
  <localDependencies>
114
    <string>tb-lsdr-matchboxeval-lib-1.0-SNAPSHOT.jar</string>
115
    <string>combinatoricslib-0.2.jar</string>
116
  </localDependencies>
117
  <artifactDependencies />
118
  <script>import eu.scape_project.tb.lsdr.MatchboxEval;
119
120
MatchboxEval me = new MatchboxEval(matchbox,groundtruth);
121
122
me.evaluate();
123
124
cid = me.getTruePositives();
125
iid = me.getFalsePositives();
126
md = me.getFalseNegatives();
127
prec = me.getPrecision();
128
rec = me.getRecall();
129
fm = me.getFmeasure();
130
131
log = me.getLog();</script>
132
  <dependencies />
133
</net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
134
  <maxJobs>1</maxJobs>
135
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
136
  <backoffFactor>1.0</backoffFactor>
137
  <initialDelay>1000</initialDelay>
138
  <maxDelay>5000</maxDelay>
139
  <maxRetries>0</maxRetries>
140
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><dot><port name="groundtruth" depth="0" /><port name="matchbox" depth="0" /></dot></strategy></iteration></iterationStrategyStack></processor><processor><name>matchbox</name><inputPorts><port><name>barcode_path</name><depth>0</depth></port></inputPorts><outputPorts><port><name>STDERR</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>STDOUT</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>external-tool-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.externaltool.ExternalToolActivity</class><inputMap><map from="barcode_path" to="barcode_path" /></inputMap><outputMap><map from="STDERR" to="STDERR" /><map from="STDOUT" to="STDOUT" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.externaltool.ExternalToolActivityConfigurationBean xmlns="">
141
  <mechanismType>D0A4CDEB-DD10-4A8E-A49C-8871003083D8</mechanismType>
142
  <mechanismName>fuel</mechanismName>
143
  <mechanismXML>&lt;?xml version="1.0" encoding="UTF-8"?&gt;&#xD;
144
&lt;sshInvocation&gt;&lt;sshNode&gt;&lt;host&gt;fue.onb.ac.at&lt;/host&gt;&lt;port&gt;22&lt;/port&gt;&lt;directory&gt;/tmp/&lt;/directory&gt;&lt;linkCommand&gt;/bin/ln -s %%PATH_TO_ORIGINAL%% %%TARGET_NAME%%&lt;/linkCommand&gt;&lt;copyCommand&gt;/bin/cp %%PATH_TO_ORIGINAL%% %%TARGET_NAME%%&lt;/copyCommand&gt;&lt;/sshNode&gt;&lt;/sshInvocation&gt;&#xD;
145
</mechanismXML>
146
  <externaltoolid>eb15da53-92d0-4dac-aa40-3101b4500997</externaltoolid>
147
  <useCaseDescription>
148
    <usecaseid />
149
    <description />
150
    <command>RANDOM=`tr -dc "[:alpha:]" &lt; /dev/urandom | head -c 20`
151
PATHID=matchbox
152
TMPDIR=/tmp/${PATHID}${RANDOM}
153
mkdir ${TMPDIR}
154
cd /opt/scape/darling/
155
/usr/bin/python ./FindDuplicates.py %%barcode_path%% --featdir ${TMPDIR} all</command>
156
    <preparingTimeoutInSeconds>1200</preparingTimeoutInSeconds>
157
    <executionTimeoutInSeconds>1800</executionTimeoutInSeconds>
158
    <tags>
159
      <string>barcode_path</string>
160
    </tags>
161
    <REs />
162
    <queue__preferred />
163
    <queue__deny />
164
    <static__inputs />
165
    <inputs>
166
      <entry>
167
        <string>barcode_path</string>
168
        <de.uni__luebeck.inb.knowarc.usecases.ScriptInputUser>
169
          <tag>barcode_path</tag>
170
          <file>false</file>
171
          <tempFile>false</tempFile>
172
          <binary>false</binary>
173
          <charsetName>UTF-8</charsetName>
174
          <forceCopy>false</forceCopy>
175
          <list>false</list>
176
          <concatenate>false</concatenate>
177
          <mime />
178
        </de.uni__luebeck.inb.knowarc.usecases.ScriptInputUser>
179
      </entry>
180
    </inputs>
181
    <outputs />
182
    <includeStdIn>false</includeStdIn>
183
    <includeStdOut>true</includeStdOut>
184
    <includeStdErr>true</includeStdErr>
185
    <validReturnCodes>
186
      <int>0</int>
187
    </validReturnCodes>
188
  </useCaseDescription>
189
  <edited>false</edited>
190
</net.sf.taverna.t2.activities.externaltool.ExternalToolActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
191
  <maxJobs>4</maxJobs>
192
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
193
  <backoffFactor>1.0</backoffFactor>
194
  <initialDelay>1000</initialDelay>
195
  <maxDelay>5000</maxDelay>
196
  <maxRetries>0</maxRetries>
197
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="barcode_path" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>read_bc_dirlist_file</name><inputPorts><port><name>fileurl</name><depth>0</depth></port></inputPorts><outputPorts><port><name>filecontents</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>localworker-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.localworker.LocalworkerActivity</class><inputMap><map from="fileurl" to="fileurl" /></inputMap><outputMap><map from="filecontents" to="filecontents" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean xmlns="">
198
  <inputs>
199
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
200
      <name>fileurl</name>
201
      <depth>0</depth>
202
      <mimeTypes>
203
        <string>'text/plain'</string>
204
      </mimeTypes>
205
      <handledReferenceSchemes />
206
      <translatedElementType>java.lang.String</translatedElementType>
207
      <allowsLiteralValues>true</allowsLiteralValues>
208
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
209
  </inputs>
210
  <outputs>
211
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
212
      <name>filecontents</name>
213
      <depth>0</depth>
214
      <mimeTypes>
215
        <string>'text/plain'</string>
216
      </mimeTypes>
217
      <granularDepth>0</granularDepth>
218
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
219
  </outputs>
220
  <classLoaderSharing>workflow</classLoaderSharing>
221
  <localDependencies />
222
  <artifactDependencies />
223
  <script>BufferedReader getReader (String fileUrl) throws IOException {
224
      InputStreamReader reader;
225
      try {
226
          reader = new FileReader(fileUrl);
227
      }
228
      catch (FileNotFoundException e) {
229
          // try a real URL instead
230
          URL url = new URL(fileUrl);
231
          reader = new InputStreamReader (url.openStream());
232
      }
233
      return new BufferedReader(reader);
234
  }
235
236
237
238
StringBuffer sb = new StringBuffer(4000);
239
240
BufferedReader in = getReader(fileurl);
241
String str;
242
String lineEnding = System.getProperty("line.separator");
243
244
while ((str = in.readLine()) != null) {
245
  sb.append(str);
246
  sb.append(lineEnding);
247
}
248
in.close();
249
filecontents = sb.toString();
250
  
251
</script>
252
  <dependencies />
253
  <localworkerName>net.sourceforge.taverna.scuflworkers.io.TextFileReader</localworkerName>
254
</net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
255
  <maxJobs>1</maxJobs>
256
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
257
  <backoffFactor>1.0</backoffFactor>
258
  <initialDelay>1000</initialDelay>
259
  <maxDelay>5000</maxDelay>
260
  <maxRetries>0</maxRetries>
261
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="fileurl" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>split_bc_dirlist</name><inputPorts><port><name>regex</name><depth>0</depth></port><port><name>string</name><depth>0</depth></port></inputPorts><outputPorts><port><name>split</name><depth>1</depth><granularDepth>1</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>localworker-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.localworker.LocalworkerActivity</class><inputMap><map from="regex" to="regex" /><map from="string" to="string" /></inputMap><outputMap><map from="split" to="split" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean xmlns="">
262
  <inputs>
263
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
264
      <name>string</name>
265
      <depth>0</depth>
266
      <mimeTypes>
267
        <string>'text/plain'</string>
268
      </mimeTypes>
269
      <handledReferenceSchemes />
270
      <translatedElementType>java.lang.String</translatedElementType>
271
      <allowsLiteralValues>true</allowsLiteralValues>
272
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
273
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
274
      <name>regex</name>
275
      <depth>0</depth>
276
      <mimeTypes>
277
        <string>'text/plain'</string>
278
      </mimeTypes>
279
      <handledReferenceSchemes />
280
      <translatedElementType>java.lang.String</translatedElementType>
281
      <allowsLiteralValues>true</allowsLiteralValues>
282
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
283
  </inputs>
284
  <outputs>
285
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
286
      <name>split</name>
287
      <depth>1</depth>
288
      <mimeTypes>
289
        <string>l('text/plain')</string>
290
      </mimeTypes>
291
      <granularDepth>1</granularDepth>
292
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
293
  </outputs>
294
  <classLoaderSharing>workflow</classLoaderSharing>
295
  <localDependencies />
296
  <artifactDependencies />
297
  <script>List split = new ArrayList();
298
if (!string.equals("")) {
299
  String regexString = ",";
300
  if (regex != void) {
301
      regexString = regex;
302
  }
303
  String[] result = string.split(regexString);
304
  for (int i = 0; i &lt; result.length; i++) {
305
      split.add(result[i]);
306
  }
307
}
308
</script>
309
  <dependencies />
310
  <localworkerName>org.embl.ebi.escience.scuflworkers.java.SplitByRegex</localworkerName>
311
</net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
312
  <maxJobs>1</maxJobs>
313
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
314
  <backoffFactor>1.0</backoffFactor>
315
  <initialDelay>1000</initialDelay>
316
  <maxDelay>5000</maxDelay>
317
  <maxRetries>0</maxRetries>
318
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="regex" depth="0" /><port name="string" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>newline</name><inputPorts /><outputPorts><port><name>value</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>stringconstant-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.stringconstant.StringConstantActivity</class><inputMap /><outputMap><map from="value" to="value" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.stringconstant.StringConstantConfigurationBean xmlns="">
319
  <value>\n</value>
320
</net.sf.taverna.t2.activities.stringconstant.StringConstantConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
321
  <maxJobs>1</maxJobs>
322
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
323
  <backoffFactor>1.0</backoffFactor>
324
  <initialDelay>1000</initialDelay>
325
  <maxDelay>5000</maxDelay>
326
  <maxRetries>0</maxRetries>
327
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy /></iteration></iterationStrategyStack></processor><processor><name>parse_matchbox_stdout</name><inputPorts><port><name>matchbox_stdout</name><depth>0</depth></port><port><name>barcode_path</name><depth>0</depth></port></inputPorts><outputPorts><port><name>duplicates_result</name><depth>0</depth><granularDepth>0</granularDepth></port><port><name>duplicates_matches</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>beanshell-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.beanshell.BeanshellActivity</class><inputMap><map from="barcode_path" to="barcode_path" /><map from="matchbox_stdout" to="matchbox_stdout" /></inputMap><outputMap><map from="duplicates_result" to="duplicates_result" /><map from="duplicates_matches" to="duplicates_matches" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean xmlns="">
328
  <inputs>
329
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
330
      <name>barcode_path</name>
331
      <depth>0</depth>
332
      <mimeTypes>
333
        <string>text/plain</string>
334
      </mimeTypes>
335
      <handledReferenceSchemes />
336
      <translatedElementType>java.lang.String</translatedElementType>
337
      <allowsLiteralValues>true</allowsLiteralValues>
338
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
339
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
340
      <name>matchbox_stdout</name>
341
      <depth>0</depth>
342
      <mimeTypes>
343
        <string>text/plain</string>
344
      </mimeTypes>
345
      <handledReferenceSchemes />
346
      <translatedElementType>java.lang.String</translatedElementType>
347
      <allowsLiteralValues>true</allowsLiteralValues>
348
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
349
  </inputs>
350
  <outputs>
351
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
352
      <name>duplicates_result</name>
353
      <depth>0</depth>
354
      <mimeTypes />
355
      <granularDepth>0</granularDepth>
356
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
357
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
358
      <name>duplicates_matches</name>
359
      <depth>0</depth>
360
      <mimeTypes />
361
      <granularDepth>0</granularDepth>
362
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
363
  </outputs>
364
  <classLoaderSharing>workflow</classLoaderSharing>
365
  <localDependencies />
366
  <artifactDependencies />
367
  <script>#!Pairtree pt = new Pairtree();
368
#!String id = pt.mapToId("/mnt/abonas/linktree/", barcode_path.substring(0,barcode_path.lastIndexOf("/")));
369
String duplicates_result = "";
370
String duplicates_matches = "";
371
duplicates_result +=  barcode_path+ ":\n";
372
duplicates_matches +=  barcode_path + "\t";
373
StringTokenizer st = new StringTokenizer(matchbox_stdout, "\n");
374
boolean startDuplicates = false;
375
boolean hasDuplicates = false;
376
while (st.hasMoreTokens()) {
377
    String token = st.nextToken();
378
    if (startDuplicates) {
379
        if (token.contains("=&gt;")) {
380
            duplicates_result += token + "\n";
381
            hasDuplicates = true;
382
        }
383
384
    }
385
    if (token.contains("=== List of detected duplicates ===")) {
386
        startDuplicates = true;
387
    }
388
}
389
if(hasDuplicates)
390
    duplicates_matches += "1";
391
else
392
    duplicates_matches += "0";</script>
393
  <dependencies />
394
</net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
395
  <maxJobs>1</maxJobs>
396
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
397
  <backoffFactor>1.0</backoffFactor>
398
  <initialDelay>1000</initialDelay>
399
  <maxDelay>5000</maxDelay>
400
  <maxRetries>0</maxRetries>
401
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><dot><port name="matchbox_stdout" depth="0" /><port name="barcode_path" depth="0" /></dot></strategy></iteration></iterationStrategyStack></processor><processor><name>read_gt_filelist_file</name><inputPorts><port><name>fileurl</name><depth>0</depth></port></inputPorts><outputPorts><port><name>filecontents</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>localworker-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.localworker.LocalworkerActivity</class><inputMap><map from="fileurl" to="fileurl" /></inputMap><outputMap><map from="filecontents" to="filecontents" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean xmlns="">
402
  <inputs>
403
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
404
      <name>fileurl</name>
405
      <depth>0</depth>
406
      <mimeTypes>
407
        <string>'text/plain'</string>
408
      </mimeTypes>
409
      <handledReferenceSchemes />
410
      <translatedElementType>java.lang.String</translatedElementType>
411
      <allowsLiteralValues>true</allowsLiteralValues>
412
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
413
  </inputs>
414
  <outputs>
415
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
416
      <name>filecontents</name>
417
      <depth>0</depth>
418
      <mimeTypes>
419
        <string>'text/plain'</string>
420
      </mimeTypes>
421
      <granularDepth>0</granularDepth>
422
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
423
  </outputs>
424
  <classLoaderSharing>workflow</classLoaderSharing>
425
  <localDependencies />
426
  <artifactDependencies />
427
  <script>BufferedReader getReader (String fileUrl) throws IOException {
428
      InputStreamReader reader;
429
      try {
430
          reader = new FileReader(fileUrl);
431
      }
432
      catch (FileNotFoundException e) {
433
          // try a real URL instead
434
          URL url = new URL(fileUrl);
435
          reader = new InputStreamReader (url.openStream());
436
      }
437
      return new BufferedReader(reader);
438
  }
439
440
441
442
StringBuffer sb = new StringBuffer(4000);
443
444
BufferedReader in = getReader(fileurl);
445
String str;
446
String lineEnding = System.getProperty("line.separator");
447
448
while ((str = in.readLine()) != null) {
449
  sb.append(str);
450
  sb.append(lineEnding);
451
}
452
in.close();
453
filecontents = sb.toString();
454
  
455
</script>
456
  <dependencies />
457
  <localworkerName>net.sourceforge.taverna.scuflworkers.io.TextFileReader</localworkerName>
458
</net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
459
  <maxJobs>1</maxJobs>
460
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
461
  <backoffFactor>1.0</backoffFactor>
462
  <initialDelay>1000</initialDelay>
463
  <maxDelay>5000</maxDelay>
464
  <maxRetries>0</maxRetries>
465
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="fileurl" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>split_gt_filelist</name><inputPorts><port><name>string</name><depth>0</depth></port><port><name>regex</name><depth>0</depth></port></inputPorts><outputPorts><port><name>split</name><depth>1</depth><granularDepth>1</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>localworker-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.localworker.LocalworkerActivity</class><inputMap><map from="regex" to="regex" /><map from="string" to="string" /></inputMap><outputMap><map from="split" to="split" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean xmlns="">
466
  <inputs>
467
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
468
      <name>string</name>
469
      <depth>0</depth>
470
      <mimeTypes>
471
        <string>'text/plain'</string>
472
      </mimeTypes>
473
      <handledReferenceSchemes />
474
      <translatedElementType>java.lang.String</translatedElementType>
475
      <allowsLiteralValues>true</allowsLiteralValues>
476
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
477
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
478
      <name>regex</name>
479
      <depth>0</depth>
480
      <mimeTypes>
481
        <string>'text/plain'</string>
482
      </mimeTypes>
483
      <handledReferenceSchemes />
484
      <translatedElementType>java.lang.String</translatedElementType>
485
      <allowsLiteralValues>true</allowsLiteralValues>
486
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
487
  </inputs>
488
  <outputs>
489
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
490
      <name>split</name>
491
      <depth>1</depth>
492
      <mimeTypes>
493
        <string>l('text/plain')</string>
494
      </mimeTypes>
495
      <granularDepth>1</granularDepth>
496
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
497
  </outputs>
498
  <classLoaderSharing>workflow</classLoaderSharing>
499
  <localDependencies />
500
  <artifactDependencies />
501
  <script>List split = new ArrayList();
502
if (!string.equals("")) {
503
  String regexString = ",";
504
  if (regex != void) {
505
      regexString = regex;
506
  }
507
  String[] result = string.split(regexString);
508
  for (int i = 0; i &lt; result.length; i++) {
509
      split.add(result[i]);
510
  }
511
}
512
</script>
513
  <dependencies />
514
  <localworkerName>org.embl.ebi.escience.scuflworkers.java.SplitByRegex</localworkerName>
515
</net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
516
  <maxJobs>1</maxJobs>
517
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
518
  <backoffFactor>1.0</backoffFactor>
519
  <initialDelay>1000</initialDelay>
520
  <maxDelay>5000</maxDelay>
521
  <maxRetries>0</maxRetries>
522
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="string" depth="0" /><port name="regex" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>read_gt_file</name><inputPorts><port><name>fileurl</name><depth>0</depth></port></inputPorts><outputPorts><port><name>filecontents</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>localworker-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.localworker.LocalworkerActivity</class><inputMap><map from="fileurl" to="fileurl" /></inputMap><outputMap><map from="filecontents" to="filecontents" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean xmlns="">
523
  <inputs>
524
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
525
      <name>fileurl</name>
526
      <depth>0</depth>
527
      <mimeTypes>
528
        <string>'text/plain'</string>
529
      </mimeTypes>
530
      <handledReferenceSchemes />
531
      <translatedElementType>java.lang.String</translatedElementType>
532
      <allowsLiteralValues>true</allowsLiteralValues>
533
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
534
  </inputs>
535
  <outputs>
536
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
537
      <name>filecontents</name>
538
      <depth>0</depth>
539
      <mimeTypes>
540
        <string>'text/plain'</string>
541
      </mimeTypes>
542
      <granularDepth>0</granularDepth>
543
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
544
  </outputs>
545
  <classLoaderSharing>workflow</classLoaderSharing>
546
  <localDependencies />
547
  <artifactDependencies />
548
  <script>BufferedReader getReader (String fileUrl) throws IOException {
549
      InputStreamReader reader;
550
      try {
551
          reader = new FileReader(fileUrl);
552
      }
553
      catch (FileNotFoundException e) {
554
          // try a real URL instead
555
          URL url = new URL(fileUrl);
556
          reader = new InputStreamReader (url.openStream());
557
      }
558
      return new BufferedReader(reader);
559
  }
560
561
562
563
StringBuffer sb = new StringBuffer(4000);
564
565
BufferedReader in = getReader(fileurl);
566
String str;
567
String lineEnding = System.getProperty("line.separator");
568
569
while ((str = in.readLine()) != null) {
570
  sb.append(str);
571
  sb.append(lineEnding);
572
}
573
in.close();
574
filecontents = sb.toString();
575
  
576
</script>
577
  <dependencies />
578
  <localworkerName>net.sourceforge.taverna.scuflworkers.io.TextFileReader</localworkerName>
579
</net.sf.taverna.t2.activities.localworker.LocalworkerActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
580
  <maxJobs>1</maxJobs>
581
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
582
  <backoffFactor>1.0</backoffFactor>
583
  <initialDelay>1000</initialDelay>
584
  <maxDelay>5000</maxDelay>
585
  <maxRetries>0</maxRetries>
586
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><cross><port name="fileurl" depth="0" /></cross></strategy></iteration></iterationStrategyStack></processor><processor><name>report</name><inputPorts><port><name>cid</name><depth>0</depth></port><port><name>fm</name><depth>0</depth></port><port><name>iid</name><depth>0</depth></port><port><name>md</name><depth>0</depth></port><port><name>prec</name><depth>0</depth></port><port><name>rec</name><depth>0</depth></port></inputPorts><outputPorts><port><name>report</name><depth>0</depth><granularDepth>0</granularDepth></port></outputPorts><annotations /><activities><activity><raven><group>net.sf.taverna.t2.activities</group><artifact>beanshell-activity</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.activities.beanshell.BeanshellActivity</class><inputMap><map from="fm" to="fm" /><map from="rec" to="rec" /><map from="md" to="md" /><map from="iid" to="iid" /><map from="cid" to="cid" /><map from="prec" to="prec" /></inputMap><outputMap><map from="report" to="report" /></outputMap><configBean encoding="xstream"><net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean xmlns="">
587
  <inputs>
588
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
589
      <name>prec</name>
590
      <depth>0</depth>
591
      <mimeTypes>
592
        <string>text/plain</string>
593
      </mimeTypes>
594
      <handledReferenceSchemes />
595
      <translatedElementType>java.lang.String</translatedElementType>
596
      <allowsLiteralValues>true</allowsLiteralValues>
597
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
598
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
599
      <name>md</name>
600
      <depth>0</depth>
601
      <mimeTypes>
602
        <string>text/plain</string>
603
      </mimeTypes>
604
      <handledReferenceSchemes />
605
      <translatedElementType>java.lang.String</translatedElementType>
606
      <allowsLiteralValues>true</allowsLiteralValues>
607
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
608
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
609
      <name>iid</name>
610
      <depth>0</depth>
611
      <mimeTypes>
612
        <string>text/plain</string>
613
      </mimeTypes>
614
      <handledReferenceSchemes />
615
      <translatedElementType>java.lang.String</translatedElementType>
616
      <allowsLiteralValues>true</allowsLiteralValues>
617
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
618
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
619
      <name>fm</name>
620
      <depth>0</depth>
621
      <mimeTypes>
622
        <string>text/plain</string>
623
      </mimeTypes>
624
      <handledReferenceSchemes />
625
      <translatedElementType>java.lang.String</translatedElementType>
626
      <allowsLiteralValues>true</allowsLiteralValues>
627
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
628
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
629
      <name>cid</name>
630
      <depth>0</depth>
631
      <mimeTypes>
632
        <string>text/plain</string>
633
      </mimeTypes>
634
      <handledReferenceSchemes />
635
      <translatedElementType>java.lang.String</translatedElementType>
636
      <allowsLiteralValues>true</allowsLiteralValues>
637
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
638
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
639
      <name>rec</name>
640
      <depth>0</depth>
641
      <mimeTypes>
642
        <string>text/plain</string>
643
      </mimeTypes>
644
      <handledReferenceSchemes />
645
      <translatedElementType>java.lang.String</translatedElementType>
646
      <allowsLiteralValues>true</allowsLiteralValues>
647
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityInputPortDefinitionBean>
648
  </inputs>
649
  <outputs>
650
    <net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
651
      <name>report</name>
652
      <depth>0</depth>
653
      <mimeTypes />
654
      <granularDepth>0</granularDepth>
655
    </net.sf.taverna.t2.workflowmodel.processor.activity.config.ActivityOutputPortDefinitionBean>
656
  </outputs>
657
  <classLoaderSharing>workflow</classLoaderSharing>
658
  <localDependencies />
659
  <artifactDependencies />
660
  <script>report = "Correctly identified duplicates (true positives): "+cid+"\n"+
661
"Incorrectly identified duplicates (false positives): "+iid+"\n"+
662
"Missed duplicates (false negatives): "+md+"\n"+
663
"Precision: "+prec+"%\n"+
664
"Recall: "+rec+"%\n"+
665
"F-Measure: "+fm+"%\n";</script>
666
  <dependencies />
667
</net.sf.taverna.t2.activities.beanshell.BeanshellActivityConfigurationBean></configBean><annotations /></activity></activities><dispatchStack><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Parallelize</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig xmlns="">
668
  <maxJobs>1</maxJobs>
669
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ParallelizeConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.ErrorBounce</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Failover</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Retry</class><configBean encoding="xstream"><net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig xmlns="">
670
  <backoffFactor>1.0</backoffFactor>
671
  <initialDelay>1000</initialDelay>
672
  <maxDelay>5000</maxDelay>
673
  <maxRetries>0</maxRetries>
674
</net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.RetryConfig></configBean></dispatchLayer><dispatchLayer><raven><group>net.sf.taverna.t2.core</group><artifact>workflowmodel-impl</artifact><version>1.4</version></raven><class>net.sf.taverna.t2.workflowmodel.processor.dispatch.layers.Invoke</class><configBean encoding="xstream"><null xmlns="" /></configBean></dispatchLayer></dispatchStack><iterationStrategyStack><iteration><strategy><dot><port name="cid" depth="0" /><port name="fm" depth="0" /><port name="iid" depth="0" /><port name="md" depth="0" /><port name="prec" depth="0" /><port name="rec" depth="0" /></dot></strategy></iteration></iterationStrategyStack></processor></processors><conditions /><datalinks><datalink><sink type="processor"><processor>matchbox_evaluate</processor><port>groundtruth</port></sink><source type="processor"><processor>read_gt_file</processor><port>filecontents</port></source></datalink><datalink><sink type="processor"><processor>matchbox_evaluate</processor><port>matchbox</port></sink><source type="processor"><processor>parse_matchbox_stdout</processor><port>duplicates_result</port></source></datalink><datalink><sink type="processor"><processor>matchbox</processor><port>barcode_path</port></sink><source type="processor"><processor>split_bc_dirlist</processor><port>split</port></source></datalink><datalink><sink type="processor"><processor>read_bc_dirlist_file</processor><port>fileurl</port></sink><source type="dataflow"><port>bc_dirlist_file_path</port></source></datalink><datalink><sink type="processor"><processor>split_bc_dirlist</processor><port>regex</port></sink><source type="processor"><processor>newline</processor><port>value</port></source></datalink><datalink><sink type="processor"><processor>split_bc_dirlist</processor><port>string</port></sink><source type="processor"><processor>read_bc_dirlist_file</processor><port>filecontents</port></source></datalink><datalink><sink type="processor"><processor>parse_matchbox_stdout</processor><port>matchbox_stdout</port></sink><source type="processor"><processor>matchbox</processor><port>STDOUT</port></source></datalink><datalink><sink type="processor"><processor>parse_matchbox_stdout</processor><port>barcode_path</port></sink><source type="processor"><processor>split_bc_dirlist</processor><port>split</port></source></datalink><datalink><sink type="processor"><processor>read_gt_filelist_file</processor><port>fileurl</port></sink><source type="dataflow"><port>gt_filelist_path</port></source></datalink><datalink><sink type="processor"><processor>split_gt_filelist</processor><port>string</port></sink><source type="processor"><processor>read_gt_filelist_file</processor><port>filecontents</port></source></datalink><datalink><sink type="processor"><processor>split_gt_filelist</processor><port>regex</port></sink><source type="processor"><processor>newline</processor><port>value</port></source></datalink><datalink><sink type="processor"><processor>read_gt_file</processor><port>fileurl</port></sink><source type="processor"><processor>split_gt_filelist</processor><port>split</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>cid</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>cid</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>fm</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>fm</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>iid</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>iid</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>md</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>md</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>prec</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>prec</port></source></datalink><datalink><sink type="processor"><processor>report</processor><port>rec</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>rec</port></source></datalink><datalink><sink type="dataflow"><port>results</port></sink><source type="processor"><processor>parse_matchbox_stdout</processor><port>duplicates_result</port></source></datalink><datalink><sink type="dataflow"><port>stderr</port></sink><source type="processor"><processor>matchbox</processor><port>STDERR</port></source></datalink><datalink><sink type="dataflow"><port>stdout</port></sink><source type="processor"><processor>matchbox</processor><port>STDOUT</port></source></datalink><datalink><sink type="dataflow"><port>matches</port></sink><source type="processor"><processor>parse_matchbox_stdout</processor><port>duplicates_matches</port></source></datalink><datalink><sink type="dataflow"><port>report</port></sink><source type="processor"><processor>report</processor><port>report</port></source></datalink><datalink><sink type="dataflow"><port>evlog</port></sink><source type="processor"><processor>matchbox_evaluate</processor><port>log</port></source></datalink></datalinks><annotations><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
675
  <annotationAssertions>
676
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
677
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
678
        <identification>696a6a59-a428-4203-9160-0867acbb4f3a</identification>
679
      </annotationBean>
680
      <date>2012-10-01 13:32:02.56 UTC</date>
681
      <creators />
682
      <curationEventList />
683
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
684
  </annotationAssertions>
685
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
686
  <annotationAssertions>
687
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
688
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
689
        <identification>79894ee6-b109-41b7-95db-8178b7eb4121</identification>
690
      </annotationBean>
691
      <date>2012-10-02 09:56:58.139 UTC</date>
692
      <creators />
693
      <curationEventList />
694
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
695
  </annotationAssertions>
696
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
697
  <annotationAssertions>
698
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
699
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
700
        <identification>66473d1e-e539-497e-874b-28f42198a8f3</identification>
701
      </annotationBean>
702
      <date>2012-09-26 15:18:43.167 UTC</date>
703
      <creators />
704
      <curationEventList />
705
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
706
  </annotationAssertions>
707
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
708
  <annotationAssertions>
709
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
710
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
711
        <identification>801043a1-195a-4b7d-8dd4-c1ebfa8d3b41</identification>
712
      </annotationBean>
713
      <date>2012-10-01 12:37:43.3 UTC</date>
714
      <creators />
715
      <curationEventList />
716
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
717
  </annotationAssertions>
718
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
719
  <annotationAssertions>
720
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
721
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.DescriptiveTitle">
722
        <text>Matchbox Evaluation</text>
723
      </annotationBean>
724
      <date>2012-10-02 09:38:19.704 UTC</date>
725
      <creators />
726
      <curationEventList />
727
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
728
  </annotationAssertions>
729
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
730
  <annotationAssertions>
731
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
732
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
733
        <identification>e26a0bd8-eb71-4fd3-9bc6-238d71792ed3</identification>
734
      </annotationBean>
735
      <date>2012-10-01 12:46:11.91 UTC</date>
736
      <creators />
737
      <curationEventList />
738
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
739
  </annotationAssertions>
740
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
741
  <annotationAssertions>
742
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
743
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
744
        <identification>0ce1f37b-c130-4473-996b-2e88763b69fc</identification>
745
      </annotationBean>
746
      <date>2012-10-01 13:09:53.436 UTC</date>
747
      <creators />
748
      <curationEventList />
749
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
750
  </annotationAssertions>
751
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
752
  <annotationAssertions>
753
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
754
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
755
        <identification>a0ac15b5-549d-48f0-8d8d-dafb284f2f7b</identification>
756
      </annotationBean>
757
      <date>2012-10-01 13:28:45.414 UTC</date>
758
      <creators />
759
      <curationEventList />
760
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
761
  </annotationAssertions>
762
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
763
  <annotationAssertions>
764
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
765
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.FreeTextDescription">
766
        <text>Matchbox evaluation against ground truth. The evaluation process first
767
creates the matchbox output and ground truth lists. It then counts each page
768
tuple from the matchbox output that is in the ground truth as correctly
769
identified tuple (true positive). Those that are not in the ground truth are
770
counted as incorrectly identified tuples (false positives), and finally,
771
those that are in the ground truth but not in the matchbox output are counted
772
as missed tuples (false negatives).
773
The precision is then calculated as the number of true positives (i.e. the 
774
number of items correctly labeled as duplicate page pairs) divided by the 
775
total number of elements assumed to be duplicate page pairs (i.e. the sum of 
776
true positives and false positives, which are items incorrectly labeled as 
777
being duplicate page pairs ). Recall is then defined as the number of 
778
true positives divided by the total number of elements of duplicate page 
779
pairs (i.e. the sum of true positives and false negatives, which are items 
780
have not been labeled as being duplicate page pairs but actually should have 
781
been).
782
The ground truth contains single page instances without duplicates and 
783
n-tuples (duplicates, triples, quadruples, etc.). n-tuples with n&gt;2 are 
784
expanded, the result is a list of 2-tuples which is used to determine the
785
number of missed duplicates (false negatives).</text>
786
      </annotationBean>
787
      <date>2012-10-02 12:36:08.934 UTC</date>
788
      <creators />
789
      <curationEventList />
790
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
791
  </annotationAssertions>
792
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
793
  <annotationAssertions>
794
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
795
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
796
        <identification>6d1c206f-f442-49d3-97b3-50cd4ac66b5f</identification>
797
      </annotationBean>
798
      <date>2012-09-26 15:20:31.846 UTC</date>
799
      <creators />
800
      <curationEventList />
801
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
802
  </annotationAssertions>
803
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
804
  <annotationAssertions>
805
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
806
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
807
        <identification>2a9e3405-7051-4feb-ba82-9ec74af23557</identification>
808
      </annotationBean>
809
      <date>2012-10-01 12:38:49.60 UTC</date>
810
      <creators />
811
      <curationEventList />
812
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
813
  </annotationAssertions>
814
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
815
  <annotationAssertions>
816
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
817
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.Author">
818
        <text>Sven Schlarb</text>
819
      </annotationBean>
820
      <date>2012-10-02 12:36:16.119 UTC</date>
821
      <creators />
822
      <curationEventList />
823
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
824
  </annotationAssertions>
825
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
826
  <annotationAssertions>
827
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
828
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
829
        <identification>5b2b5686-2a2c-4b25-a525-3d5c49ce8e48</identification>
830
      </annotationBean>
831
      <date>2012-10-02 11:32:35.237 UTC</date>
832
      <creators />
833
      <curationEventList />
834
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
835
  </annotationAssertions>
836
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
837
  <annotationAssertions>
838
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
839
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
840
        <identification>24c30e8d-a759-497e-ae9a-4c2c6cfb7bcf</identification>
841
      </annotationBean>
842
      <date>2012-10-01 12:40:56.91 UTC</date>
843
      <creators />
844
      <curationEventList />
845
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
846
  </annotationAssertions>
847
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
848
  <annotationAssertions>
849
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
850
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
851
        <identification>eaa27b27-7bc6-4461-875e-499f30946c6a</identification>
852
      </annotationBean>
853
      <date>2013-07-23 13:59:10.99 UTC</date>
854
      <creators />
855
      <curationEventList />
856
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
857
  </annotationAssertions>
858
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
859
  <annotationAssertions>
860
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
861
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
862
        <identification>506fc52e-7052-4f57-8c15-83821665c03e</identification>
863
      </annotationBean>
864
      <date>2012-10-02 09:56:29.495 UTC</date>
865
      <creators />
866
      <curationEventList />
867
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
868
  </annotationAssertions>
869
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
870
  <annotationAssertions>
871
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
872
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
873
        <identification>32d73c43-4cf5-4abe-a452-816bd12dc063</identification>
874
      </annotationBean>
875
      <date>2012-10-01 12:56:53.971 UTC</date>
876
      <creators />
877
      <curationEventList />
878
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
879
  </annotationAssertions>
880
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
881
  <annotationAssertions>
882
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
883
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
884
        <identification>72783ac8-1be8-47c7-834e-90b211fd678d</identification>
885
      </annotationBean>
886
      <date>2012-10-02 09:41:25.373 UTC</date>
887
      <creators />
888
      <curationEventList />
889
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
890
  </annotationAssertions>
891
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
892
  <annotationAssertions>
893
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
894
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
895
        <identification>318bf84e-aeac-4cf5-8fa9-1f18fc42a308</identification>
896
      </annotationBean>
897
      <date>2012-10-01 12:36:43.490 UTC</date>
898
      <creators />
899
      <curationEventList />
900
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
901
  </annotationAssertions>
902
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
903
  <annotationAssertions>
904
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
905
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
906
        <identification>857c050a-49b5-4606-bcdb-220455d64291</identification>
907
      </annotationBean>
908
      <date>2012-10-01 12:33:52.730 UTC</date>
909
      <creators />
910
      <curationEventList />
911
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
912
  </annotationAssertions>
913
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
914
  <annotationAssertions>
915
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
916
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
917
        <identification>c16b4b3d-a30d-44fb-a6db-470e4b08c1b8</identification>
918
      </annotationBean>
919
      <date>2012-09-26 15:09:52.547 UTC</date>
920
      <creators />
921
      <curationEventList />
922
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
923
  </annotationAssertions>
924
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
925
  <annotationAssertions>
926
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
927
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
928
        <identification>f85240d9-2108-4057-8c42-78eef7ecf441</identification>
929
      </annotationBean>
930
      <date>2012-10-01 12:27:16.65 UTC</date>
931
      <creators />
932
      <curationEventList />
933
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
934
  </annotationAssertions>
935
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
936
  <annotationAssertions>
937
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
938
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
939
        <identification>6fa652e4-4cd9-462e-adee-7698bd80de14</identification>
940
      </annotationBean>
941
      <date>2012-10-01 12:33:37.287 UTC</date>
942
      <creators />
943
      <curationEventList />
944
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
945
  </annotationAssertions>
946
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
947
  <annotationAssertions>
948
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
949
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
950
        <identification>f24d982c-72c1-4654-b2b3-378a99dc1287</identification>
951
      </annotationBean>
952
      <date>2013-07-24 07:41:18.881 UTC</date>
953
      <creators />
954
      <curationEventList />
955
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
956
  </annotationAssertions>
957
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
958
  <annotationAssertions>
959
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
960
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
961
        <identification>8538abb1-b420-4d8e-be5f-64da636f59c1</identification>
962
      </annotationBean>
963
      <date>2012-10-02 09:37:23.56 UTC</date>
964
      <creators />
965
      <curationEventList />
966
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
967
  </annotationAssertions>
968
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
969
  <annotationAssertions>
970
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
971
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
972
        <identification>9ac568e3-fef8-4bb5-b035-f3680e35e284</identification>
973
      </annotationBean>
974
      <date>2012-10-02 12:28:00.33 UTC</date>
975
      <creators />
976
      <curationEventList />
977
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
978
  </annotationAssertions>
979
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
980
  <annotationAssertions>
981
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
982
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
983
        <identification>2efd4378-62f5-41ea-bf6f-dd7d50cd732e</identification>
984
      </annotationBean>
985
      <date>2012-10-01 13:08:54.255 UTC</date>
986
      <creators />
987
      <curationEventList />
988
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
989
  </annotationAssertions>
990
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
991
  <annotationAssertions>
992
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
993
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
994
        <identification>81553fe0-9aa5-4fc7-a775-19e96144f650</identification>
995
      </annotationBean>
996
      <date>2012-10-02 09:38:25.821 UTC</date>
997
      <creators />
998
      <curationEventList />
999
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
1000
  </annotationAssertions>
1001
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2><annotation_chain_2_2 encoding="xstream"><net.sf.taverna.t2.annotation.AnnotationChainImpl xmlns="">
1002
  <annotationAssertions>
1003
    <net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
1004
      <annotationBean class="net.sf.taverna.t2.annotation.annotationbeans.IdentificationAssertion">
1005
        <identification>29e62dd9-20e7-4851-abc5-17d0dbc3d805</identification>
1006
      </annotationBean>
1007
      <date>2012-10-02 12:36:17.294 UTC</date>
1008
      <creators />
1009
      <curationEventList />
1010
    </net.sf.taverna.t2.annotation.AnnotationAssertionImpl>
1011
  </annotationAssertions>
1012
</net.sf.taverna.t2.annotation.AnnotationChainImpl></annotation_chain_2_2></annotations></dataflow></workflow>