Report abuse

# parse_kif.tt
#
#   http://treetop.rubyforge.org/
#   http://treetop.rubyforge.org/semantic_interpretation.html
#   http://treetop.rubyforge.org/syntactic_recognition.html
#   http://treetop.rubyforge.org/using_in_ruby.html
#   http://treetop.rubyforge.org/pitfalls_and_advanced_techniques.html
#
#   http://en.wikipedia.org/wiki/Parsing_expression_grammar
#
#   http://logic.stanford.edu/kif/dpans.html#4
#
# Many thanks to hagabaka and db-keen on #treetop !

grammar KifGrammar

  # The exp* methods and explanations are courtesy of db-keen on #treetop,
  # but any errors are almost undoubtedly my own.
  #
  # An exp_list is a space-separated list of exp_wraps.
  # An exp_wrap is either a parenthesized exp_list or a token.
  # The two rules are mutually recursive...
  #
  rule exp_list 
    space? exps:(exp_wrap ( space? exp_wrap )*)? space? { 

      # Let's walk through the expressions method.  The first thing it
      # does is call super(), which returns the syntax node given by the
      # 'exps' label above (because they have the same name).  
      #
      # Then, it fetches the elements of that syntax node, which is either
      # nil or an array of syntax nodes:  [ exp, ( space exp )* ]
      # (If elements returns a nil, we just hand back an empty array.)
      #
      # It's going to return an array, the first element of which is
      # simply that first exp:  els[0]
      #
      # Then it has to deal with els[1], which is:  ( space exp )*
      # els[1].elements returns an array:  [ space exp, space exp, ... ]
      #
      # So then we map that array to return just the second elements,
      # just the exps.  So we get:  [ exp, exp, exp, ....]
      #
      # Then we splat it into an array following the first exp.
      #
      def exps
        els = super.elements

        return [] unless (els)  # '()' -> []

        [ els[0], *els[1].elements.map{ |i| i.elements[1] } ]
      end

      def to_ruby
        out = []
        exps.each do |e|
          tmp = e.to_ruby
          out << tmp unless ($rk_strip and
                             tmp.class      == Symbol and
                             tmp.to_s[0..0] == ';') 
        end
        return out
      end
    }
  end

  rule exp_wrap 
    '(' exp_list ')' { 

      # The method_missing() and respond_to() defs just pass special
      # calls like to_ruby() on to exp_list().  This lets us ignore
      # the fact that exp_wrap() exists, from an API perspective.
      #
      def method_missing(m, *args); exp_list.send(m, *args);  end 

      def respond_to?(m);           exp_list.respond_to?(m);  end 
    } 
    / token
  end 

  # Noise (ie, '#|...|#' comments)

  rule noise
    '#|' n_b '|#'     { def to_ruby; text_value.to_sym;       end }
  end

  rule n_b
    (noise / !'#|' !'|#' .)*
  end

  # Notes (ie, ";...\n" comments)

  rule note
    # !"\n" keeps the following '.' from matching a newline.
    # So, match a ';', followed by any number of things that
    # aren't newlines, followed by either a newline or EOF.
    #
    # The positive lookahead (&) keeps us from grabbing the
    # newline, so juxtaposed comment lines parse OK.

    ';' contents:(!"\n" .)* &("\n" / !.)  {
      def to_ruby
        (';' + contents.text_value + "\n").to_sym
      end
    }
  end

  # Numbers (exponential, floating point, and integer numbers)

  rule b_f            # base - float
    b_i '.' b_i
  end

  rule b_i            # base - integer
    '-'? [0-9]+
  end 

  rule n_e
    b_f 'e' b_i !nd   { def to_ruby; text_value.to_f;        end }
    /
    b_i 'e' b_i !nd   { def to_ruby; text_value.to_f;        end }
  end

  rule n_f            # number - float
    b_f !nd           { def to_ruby; text_value.to_f;        end }
  end

  rule n_i            # number - integer
    b_i !nd           { def to_ruby; text_value.to_i;        end }
  end

  rule number
    n_e / n_f / n_i
  end 

  # Space (any of blank, formfeed, tab, newline, or return)

  rule space
    (' ' / "\f" / "\t" / "\n" / "\r")+
  end

  # Strings

  rule string
    # !'"' keeps the following '.' from matching a quote.

    '"' body:(!'"' . / '\"')* '"' {
      def to_ruby; body.text_value; end
    }
  end

  # Token (note, string, number, word)

  rule token
    noise / note / string / number / word
  end

  # Word

  rule word
    wc (es / wc)*     { def to_ruby; text_value.to_sym;      end }
  end

  rule dgt
    [0-9]
  end

  rule es             # escape sequence
    '\\' .    { def to_ruby; '\\' + '%03o' % text_value[1];  end } 
  end

  rule etc
    [:!%&/<=>@_~] / '*' / '+' / '-' / '?' / '$' / '.'
  end

  rule ltr            # letter
    [a-zA-Z]
  end

  rule nd             # non-digit
    etc / ltr
  end

  rule wc             # word character
    dgt / nd          { def to_ruby; text_value;             end } 
  end

end