Using regular expressions on the RHS of a rule
Rule: ExtractAuthor
(
  {Reference.type == "Literature"}
  |{Reference.type == "Patent"}
):reference
-->
{
  AnnotationSet set = (AnnotationSet)bindings.get("reference");
  Annotation ann = set.iterator().next();
  FeatureMap fm = (FeatureMap)
    ((SimpleFeatureMapImpl)ann.getFeatures()).clone();
  fm.put("postprocessing.rule", "reference-extract-author.ExtractAuthor");
  try {
  String text = doc.getContent().getContent(
    set.firstNode().getOffset(), set.lastNode().getOffset()).toString();
  text = text.replaceAll("\\s", " "); // replace new line with space
  String lastName =
     "\\b" // beginning of a word
    +"(?:\\p{Ll}{0,3} )?" // particle ?
    +"\\p{Lu}[\\p{L}-]{1,13}" // Name
    +"(?: \\p{Ll}{0,3})?"; // particle ?
  String initials = "(?: \\p{Lu}\\.){1,3}";
  java.util.regex.Matcher matcher = java.util.regex.Pattern.compile(
    lastName+"(:?(?:,"+initials+")|(?:,? and "+lastName+")|(?:,? et al\\.?))"
    ).matcher(text);
  while (matcher.find()) {
    outputAS.add(set.firstNode().getOffset()+matcher.start(),
                 set.firstNode().getOffset()+matcher.end(),
                 "Author", fm);
  }
  } catch(InvalidOffsetException e) {
      throw new GateRuntimeException(e);
  }
}
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License