Using regular expressions on the RHS of a rule
Rule: ExtractAuthor
(
{Reference.type == "Literature"}
|{Reference.type == "Patent"}
):reference
-->
{
AnnotationSet set = (AnnotationSet)bindings.get("reference");
Annotation ann = set.iterator().next();
FeatureMap fm = (FeatureMap)
((SimpleFeatureMapImpl)ann.getFeatures()).clone();
fm.put("postprocessing.rule", "reference-extract-author.ExtractAuthor");
try {
String text = doc.getContent().getContent(
set.firstNode().getOffset(), set.lastNode().getOffset()).toString();
text = text.replaceAll("\\s", " "); // replace new line with space
String lastName =
"\\b" // beginning of a word
+"(?:\\p{Ll}{0,3} )?" // particle ?
+"\\p{Lu}[\\p{L}-]{1,13}" // Name
+"(?: \\p{Ll}{0,3})?"; // particle ?
String initials = "(?: \\p{Lu}\\.){1,3}";
java.util.regex.Matcher matcher = java.util.regex.Pattern.compile(
lastName+"(:?(?:,"+initials+")|(?:,? and "+lastName+")|(?:,? et al\\.?))"
).matcher(text);
while (matcher.find()) {
outputAS.add(set.firstNode().getOffset()+matcher.start(),
set.firstNode().getOffset()+matcher.end(),
"Author", fm);
}
} catch(InvalidOffsetException e) {
throw new GateRuntimeException(e);
}
}
page_revision: 0, last_edited: 1227546747|%e %b %Y, %H:%M %Z (%O ago)





