! phonecode.ax - specification axioms for Prechelt's "phone code" problem ! -- see utility functions and predicates in the utilities directory !---+----x----+----x----+----x----+----x----+----x----+----x----+----x----+----8 !/ Given the "encode_mapping" below, we want to encode telephone numbers by words so that they will be easier to remember. from: Lutz Prechelt, An empirical comparison of C, C++, Java, Perl, Python, Rexx, and Tcl for a search/string-processing program TR 2000-5, Fakultat fur Informatik, Universitat Karlsruhe, Germany, 3-10-00. The Problem: Your task is writing a program that finds, for a given phone number, all possible encodings by words, and prints them. A phone number is an arbitrary(!) string of dashes (-), slashes (/) and digits. The dashes and slashes will not be encoded. The words are taken from a dictionary which is given as an alphabetically sorted ASCII file (one word per line). Only exactly each encoding that is possible from this dictionary and that matches the phone number exactly shall be printed. Thus, possibly nothing is printed at all. The words in the dictionary contain letters (capital or small, but the difference is ignored in the sorting), dashes (-) and double quotes ("). For the encoding only the letters are used, but the words must be printed in exactly the form given in the dictionary. Leading non-letters do not occur in the dictionary. Encodings of phone numbers can consist of a single word or of multiple words separated by spaces. The encodings are built word by word from left to right. If and only if at a particular point no word at all from the dictionary can be inserted, a single digit from the phone number can be copied to the encoding instead. Two subsequent digits are never allowed, though. To put it differently: In a partial encoding that currently covers k digits, digit k + 1 is encoded by itself if and only if, first, digit k was not encoded by a digit and, second, there is no word in the dictionary that can be used in the encoding starting at digit k + 1. Your program must work on a series of phone numbers; for each encoding that it finds, it must print the phone number followed by a colon, a single(!) space, and the encoding on one line; trailing spaces are not allowed. All remaining ambiguities in this specification will be resolved by the following example. (Still remaining ambiguities are intended degrees of freedom.) ... Program valid expression format: (Program ) -- 2 input text files and 1 output file or (Program ) where == (... ...) -- sequence of words == sequence of upper- and lowercase letters plus - and " == (... ...) -- sequence of phone numbers == sequence of digits plus - and / == (... ...) -- seq of output lines, 1 per encoding == ': ' !\ ! encode_mapping - mapping of letters to digits for encoding (encode_mapping ("E" "JNQ" "RWX" "DSY" "FT" "AM" "CIV" "BKU" "LOP" "GHZ")). ! 0 1 2 3 4 5 6 7 8 9 ! letter>digit - mapping of upper- or lowercase letter to digit (letter>digit %ltr %dig)< ! get uppercase mapping (encode_mapping ($enc1 ($l1 %ltr $l2) $enc2)), ! find letter in mapping (length ($enc1) %dnum), ! digit number is position of letter group (index digit %dig %dnum). ! get digit character (letter>digit (`char (%b7 %b6 `1 %b4 %b3 %b2 %b1 %b0) %dig)< !lowercase map (letter>digit (`char (%b7 %b6 `0 %b4 %b3 %b2 %b1 %b0) %dig). ! -- define lowercase mapping from uppercase ! Program - phonecode program valid expressions (see above format) (Program %dict %phone#s %outencs)< ! encode phone numbers ((_ (1* phonecode1)) %dict %phone#s %outencs). ! concat each #'s encodings ! -- we concatenate the each phone#'s encoding output lines ! phonecode1 - get all encodings for one phone number ! (phonecode1 ) (phonecode1 %dict %phone# %outlines)< ((filter (in_set? digit)) %phone# %Dphone#), ! get phone # as digits-only (partial_encodings %dict %Dphone# () %encodings), ! get encoding solutions ((1* output_encoding) %phone# %encodings %outlines). !put in output format ! partial_encodings - incrementally turn encoding prefixes into complete encodes ! (partial_encodings ) ! -- These encode prefixes are either null or end in a dictionary word. ! (Thus, a single digit may follow if "last-resort".) ! -- Both prefix and solution encodes are sequences of dictionary words, ! optionally interspersed with "last resort" digit characters. (partial_encodings %dict %phone# (()) ()). ! empty seq is valid encode prefix ! extend a prefix to get longer prefixes: (partial_encodings %dict %phone# ($prefixes' $prefixes) %solns)< (partial_encodings %dict %phone# (%prefix $prefixes) %solns), (encode_suffix %dict %phone# %prefix %suffix), ! rest of num after encode (matching_words %dict %suffix (%nxw $nxws)), ! >0 words match suffix ((1* append1) %prefix (%nxw $nxws) ($prefixes')). ! append new wds to pre (partial_encodings %dict %phone# ($prefixes' $prefixes) %solns)< (partial_encodings %dict %phone# (($prefix) $prefixes) %solns), (encode_suffix %dict %phone# ($prefix) (%suf $suf)), !rest of num after enc (matching_words %dict (%suf $suf) ()), ! 0 words match suffix (matching_words %dict ($suf) %nxws), ! >=0 words match rest of suf ((1* append1) ($prefix %suf) %nxws ($prefixes')). ! append dig & words ! -- there are no new prefixes if ($suf) has no matching words ! or turn a prefix into a solution, if possible: (partial_encodings %dict %phone# ($prefixes) (%prefix $solns))< ! soln found! (partial_encodings %dict %phone# (%prefix $prefixes) ($solns)), (encode_suffix %dict %phone# %prefix ()). ! prefix covers entire number (partial_encodings %dict %phone# ($prefixes) (($prefix %suf) $solns))< (partial_encodings %dict %phone# (($prefix) $prefixes) ($solns)), (encode_suffix %dict %phone# ($prefix) (%suf)), ! just one digit left (matching_words %dict (%suf) ()), ! no words match 1-digit suffix ! -- thus we can append remaining digit to prefix to make a solution ! encode_suffix - get suffix of digit string after encoding prefix is applied ! (encode_suffix ) ! -- our encoding prefix need not have the single digit restrictions (encode_suffix ($) () ($)). ! if prefix null then entire sequence is suffix (encode_suffix ($word_digits $digits) (%word $prefix) %suffix)< (encode_suffix ($digits) ($prefix) %suffix), (word>digits %word ($word_digits)). (encode_suffix (%digit $digits) (%digit $prefix) %suffix)< (encode_suffix ($digits) ($prefix) %suffix), (digit %digit). ! matching_words - get dictionary words that match prefix of a digit string ! (matching_words ) (matching_words %dict %digits %matches)< ((filter*1 prefix_match?) %dict %digits %matches). ! prefix_match? - test if word's encoding is prefix of digit string ! (prefix_match? ) (prefix_match? %word %digits %t/f)< (word>digits %word %wddigs), (prefix? %wddigs %digits %t/f). ! word>digits - map a "word" to the digit string it represents ! -- "word" can be upper- or lowercase letters plus - and " (which are ignored) (word>digits () ()). (word>digits (%ltr $word) (%dig $digits))< (letter>digit %ltr %dig), (word>digits ($word) ($digits)). (word>digits (%ignored $word) %digits)< (in %ignored "-"""), ! ignore - and " (word>digits ($word) %digits). ! output_encoding - output an encoding to a character string (output_encoding %phone# %encoding %line)< (insert_between ' ' %encoding %encoding'), !insert spaces between enc elems (write (%phone# ": " %encoding') %line). ! write phone# & encoding to line