#!/usr/bin/gawk -f
# Forth cross compiler for the OneDer
# Details on the wiki





# end of string
instring!="" && substr($1,1,length(instring))==instring {
# note that "load" is not initialized yet!
    FS=OldFS
    nrf=split($0,strx,OldFS)
    for (i=2;i<=nrf;i++) {
	s=sprintf("\\%03o",strtonum(strx[i]))
	strings[nextstring]=strings[nextstring] s
    }
    $0 = ": " stringlbl[nextstring] " macro { LDRI " opt_slabel nextstring ", FDSTK_PUSH } ;"
    nextstring++;
    instring=""
}

# middle of string
instring!="" {
    if (strlinect++) strings[nextstring] = strings[nextstring] "\\r\\n"
    strings[nextstring]=strings[nextstring] $0
    next
}




/^[ \t]*\\!org/ { opt_org=$2; next; }
/^[ \t]*\\!quick/ { opt_quick=1; if ($2!="") opt_quick=$2; next; }
/^[ \t]*\\!main/ { opt_mainword=$2; next; }
/^[ \t]*\\!mainlabel/ { opt_mainlbl=$2; next; }
/^[ \t]*\\!wordlabel/ { opt_wlabel=$2; next; }
/^[ \t]*\\!jumplabel/ { opt_jlabel=$2; next; }
/^[ \t]*\\!initsp/ { opt_isp=$2; next; }   
/^[ \t]*\\!initrsp/ { opt_irsp=$2; next; } 
/^[ \t]*\\!wordcomment/  { opt_wordcom=1; if ($2!="") opt_wordcom=$2; next; }
/^[ \t]*\\!ignorecase/ { opt_case=1; if ($2!="") opt_case=$2; next; }
/^[ \t]*\\!nomainloop/ { opt_mloop=0; if ($2!="") opt_mloop=$2; next; }
/^[ \t]*\\!herebase/ { here=opt_herebase=$2; next; }
/^[ \t]*\\!topreg/ { regcallnum=opt_topreg=$2; next; }
/^[ \t]*\\!include/ {
        sub(/^[ \t]*\\!include[ \t]+/,"")
	ARGV[ARGC++]=$0
	next;
    }
/^[ \t]*\\!/    { 
    print "Warning " FILENAME "(" FNR "): Unknown option: " $0 >"/dev/stderr" 
    next; 
}

# post option init
function postinit() { 
    opt_mainword=getalias(opt_mainword) 
    todo[opt_mainword]=opt_mainword;
    sym[opt_mainword]=0;   # preload this
    if (opt_quick) jmp="\tJMPQ "; else jmp="\tJMP ";
    if (opt_quick) call="\tCALLQ "; else call="\tCALL ";
    if (opt_quick) loadc="\tLDRIQ "; else loadc="\tLDRI ";
}

function prolog() {
# todo make an option to set org -- or maybe a compile time word?
    print "\tORG " opt_org
    print ";Prolog goes here"
    if (opt_mloop) print opt_mainlbl ":"
    print loadc opt_irsp  ",FSTK_SP"
    print loadc opt_isp ",FDSTK_SP"
    print "\tMOV FIMMV,R(17)  ; R17 is top of stack"
    print "; R16 is temp storage, R18 is _here"
    print call "_POST_INIT4"
    print call opt_wlabel "0"
    if (!opt_mloop) print opt_mainlbl ":"
    print jmp opt_mainlbl
    # be sure to update nextword
    print ";End prolog"
}

function epilog() {
    print ";Epilog goes here"
    print "_POST_INIT4:" loadc opt_herebase ",R(18)"
# need to dump vinit out here
    for (x in vinit) {
	# general outline, load MP1, load immediate, store, increment
	# for now just cheat
	if (done["!"]=="") wcompile("!"); 
	print "; init " x " = " vinit[x]
	doword(vinit[x],0,empty);
	doword(x,0,empty);
	doword("!",0,empty);
    }
    for (x in reginit) {
	print loadc opt_wlabel sym[x] ",R(" reginit[x] ")"
	    }
    print "\tRETURN"
    for (x in stringlbl) {
	if (macused[stringlbl[x]]) {
	    if (strpack[x]) op="STRINGPACK"; else op="STRING"
	    print opt_slabel x ":"
	    if (strcount[x]) print "\tDATA " opt_slabel x "_end-" opt_slabel x "-1"
	    print "\t" op " \"" strings[x] "\""
	    print opt_slabel x "_end:"
#	    print "\tDATA 0"
	}
    }
    print "\tEND"
}

function compile() {
    w1=getalias(opt_mainword)
    if (word[w1]=="" && cons[w1]=="" && macro[w1]=="") err(w1 " not found")
    prolog();
    processed=0;
    do {
	processed=0;
	for (w in todo) {
	    w1=getalias(w)
	    if (done[w1]!="") continue;
	    if (macro[w1]=="" || w1!=w ) 
		wcompile(w1);
	    delete todo[w]
	    processed=1;
	}
    } while (processed);
    epilog();
}



function wcompile(w) {
        cword=w;
	s=word[w]
	delete todo[w]
	printf (opt_wlabel "%d:   ; %s definition\n",sym[w],w);
	if (opt_wordcom && s!="") {
	    print "; : " w " " substr(s,3) " ;"
	}
	done[w]=sym[w];
	if (cons[w]!="") {
# odd case, word redefines constant
	    literal(cons[w])
	    return;
	}
	if (s=="") {
	    s=macro[w];  # maybe a forced macro only word
	    if (s!="" && xtra[s]!="") todo[xtra[s]]=xtra[s]
	}
	if (s=="") {
# hmm not there
	    print ";",w,"missing"
	    print w,"not found" >"/dev/stderr"
	    return;
	}
	nwords=split(s,words);
	if (nwords==1) {
	    print "\tRETURN;"
	    final_term=1;
	    return;
	}
	for (wp=2; wp<nwords; wp++) {
	    doword(words[wp],0,words);
	}
# do final word
	if (wp==nwords) {
	    final_term=0;
	    if (doword(words[nwords],1,words)!=0) {
		if (!final_term) print "\tRETURN";
	    }
	}
	else {  # catch case where we bumped past the end
	    print "\tRETURN";
	}

}

function err(s)
{
    errflag=1
    print FILENAME,"(" FNR "): " s  >"/dev/stderr"
    exit 1;
}

function isnum(wrd) {
 return wrd ~ /^[+-]?(([0-9]+)|(0[xX][0-9a-fA-F]+))$/
}

function literal(wrd) {
    if (isnum(wrd)) {
	v=strtonum(wrd);
	v1=xon["n" v ]
	if (v1!="") {
	    printf("\tMOV 0x%1X00,FDSTK_PUSH   ; push %s\n",v1,wrd);
	    return 1;
	}
	if (v<=0xFFFFFFF && v>=0) {
	    # short constant
	    print "\tLDIQ ", v, ";", wrd
	    print "\tMOV FIMMV,FDSTK_PUSH";
	    return 1;
	}
	if (v<0) {
# TODO: worry about 64 to 32-bit issues here so for now, punt
	}
	print "\tLDI",wrd
	print "\tMOV FIMMV,FDSTK_PUSH   ; push immediate"
	return 1;

    }
    return 0;


}

function settodo(l) 
{
    todo[l]=l
}

function immediate(wrd,final,words) {
    if (wrd=="'") {
	l=words[++wp]
	if (sym[l]=="") {
	    sym[l]=nextword++;
	    settodo(l)
	}
	print loadc opt_wlabel  sym[l] ",FDSTK_PUSH"
	return 1;
    }
    if (wrd=="label") {
	l=words[++wp];
	print l ":"
	return 1;
    }
    if (wrd=="goto") {
	l=words[++wp];
	print jmp l
	final_term=1;  # don't do a return next if I'm last
	return 1;
    }
    if (wrd=="if") {
	printf "\tMOV FDSTK_POP,FACC\n";
	if (opt_quick) {
	    printf "\tJMPZQ " opt_jlabel "%d\n",nextjump;
	} else {
    	    printf "\tJMPZ " opt_jlabel "%d\n",nextjump;
	}
	jumpstack[jmpsp++]=nextjump++;
	return 1;
    }
    if (wrd=="then" || wrd=="endif") {
	if (jmpsp==0) err("Mismatched then/endif");
	printf(opt_jlabel "%d:\n",jumpstack[--jmpsp]);
	return 1;
    }
    if (wrd=="else") {
	if (jmpsp==0) err("Mismatched else");
	printf(jmp opt_jlabel "%d\n",nextjump);
	printf(opt_jlabel "%d:\n",jumpstack[--jmpsp]);
	jumpstack[jmpsp++]=nextjump++;
	return 1;
    }
# note: expects quotes around char
    if (wrd=="char") {
	c=words[++wp]
	if (substr(c,1,1)!="'") err("Misformed character constant in " cword);
# note that ' ' is a space use '\t' for tab
	if (c=="'") {
	    c="' '";
	    wp++;
	}
# note: we know this is <256 so always use LDRIQ
	print "\tLDRIQ " c ", FDSTK_PUSH"
	return 1;
    }
    if (wrd=="recurse") {
	print (final?jmp:call) opt_wlabel sym[cword]
	final_term=final;
	return 1;
    }
    if (wrd=="begin") {
	printf opt_jlabel "%d:\n",nextjump;
	jumpstack[jmpsp++]=nextjump++;
	return 1;
    }
    if (wrd=="until") {
	j=jumpstack[--jmpsp];
	printf("\tMOV FDSTK_POP,FACC\n");
	if (opt_quick) {
	    printf("\tJMPZQ " opt_jlabel  j "\n");
	} else {
	    printf("\tJMPZ " opt_jlabel  j "\n");
	}

	return 1;
    }
    if (wrd=="again") {
	j=jumpstack[--jmpsp];
	printf jmp opt_jlabel  "%d\n",j
	final_term=1;  # don't do a return if I was last
	return 1;
    }
    if (wrd=="while") {
	print "\tMOV FDSTK_POP,FACC";
	if (opt_quick) {
	    printf "\tJMPZQ " opt_jlabel "%d\n",nextjump;
	} else {
	    printf "\tJMPZ " opt_jlabel "%d\n",nextjump;
	}
	t=jumpstack[--jmpsp]
	jumpstack[jmpsp++]=nextjump++;
	jumpstack[jmpsp++]=t;
	return 1;
    }
    if (wrd=="repeat") {
	j=jumpstack[--jmpsp];
# can't set final_term here since while will jump below me
	printf jmp opt_jlabel "%d\n",j
	j=jumpstack[--jmpsp];
	printf(opt_jlabel "%d:\n",j);
	return 1;
    }
    return 0; # no action
}

function getalias(w) {
# resolve aliases
    do {
	if (xtra[w]!="" && sym[w]=="") {
	    todo[xtra[w]]=xtra[w];
	    sym[xtra[w]]=nextword++;
	}
	wt=alias[w]
	if (wt!="") w=wt;
    } while (wt!="");
    return w;
}

function doword(wrd, final, words, nwords, mwords,mm) {  # nwords, mwords, mm local
    if (emitflag) {
	if (wrd=="}") {
	    printf("\n");
	    emitflag=0;
	    return 1;
	}
	printf(" %s ",wrd);
	return 0;
    }
    wrd=opt_case?tolower(wrd):wrd
    wrd=getalias(wrd)
    if (cons[wrd]!="") {
	if (literal(cons[wrd])!=0) return 1;
    }
    if (wrd=="{") {
	emitflag=1;
	printf("\t");  # use label if you want a label
	return 0;  
    }
    if (immediate(wrd,final,words)!=0) return 1;
    if (literal(wrd)!=0) return 1;
    if (macro[wrd]!="") {
        if (xtra[wrd]!="") {
	    todo[xtra[wrd]]=xtra[wrd]
	    if (sym[xtra[wrd]]=="") sym[xtra[wrd]]=nextword++; 
	}
	ms=macro[wrd]
	macused[wrd]=wrd
# expand macro
	nwords=split(macro[wrd],mwords);
	if (nwords>1) {
	    wpstack[wpstackp++]=wp
	    for (wp=2;wp<nwords;wp++)
		rv=doword(mwords[wp],0,mwords);  
	    if (wp==nwords) rv=doword(mwords[nwords],final,mwords);
	    wp=wpstack[--wpstackp]
	    return rv;
	}
	return 1;
    }
    l=sym[wrd];
    if (l=="") {
	sym[wrd]=nextword;
	l=nextword++;
	settodo(wrd)
    }
    if (final) {
	# check for fall through optimization
	if (done[wrd]=="") {
	    print "; fall through to " wrd
	    wcompile(wrd);
	    
	}
	else 
	    print jmp,opt_wlabel l,";",wrd;  # hidden return optimization
    }
    else print call,opt_wlabel l,";",wrd
    return 0;
}


function init() {
# global wp stack (for macro expansion)
    wpstackp=0
    regcallnum=50
    if (ARGC==1) ARGV[ARGC++]="-";
# The following was to force 1forth.lib in ahead of your code
# but now we just do that in the driver script
# anticipating multiple targets
#    for (i=ARGC;i>=1;i--) ARGV[i+1]=ARGV[i];
#    ARGV[1]="1forth.lib"  # to do -- expand path
#    ARGC++;
# options
    opt_org = 0x200;
    opt_quick = 0;
    opt_mainlbl = "_main"
    opt_mainword = "_pre_main"
    opt_wlabel = "_L"
    opt_jlabel = "_J"
    opt_slabel = "_S"
    opt_irsp="0x7ff";
    opt_isp="0x1ff"; 
    opt_wordcom=0;
    opt_case=0;
    opt_mloop=1;
    opt_herebase=0x10;
    opt_topreg=50;

    nextword=1;
    nextjump=1;
    nextstring=1;
    jmpsp=0;
    cword="";
    emitflag=0;
    comflag=0;
    final_term=0;  # if 1, no need to generate last return
    here=0x10;
# set up fake constant unit for compiler
# Note on a 32 bit system the big numbers might be negative?
    xon["n0"]=0;
    xon["n1"]=1;
    xon["n2"]=2;
    xon["n2147483648"]=3;
    xon["n4"]=4;
    xon["n255"]=5;
    xon["n65280"]=6;
    xon["n16711680"]=7;
    xon["n4278190080"]=8;
    xon["n16"]=9;
    xon["n10"]=10;
    xon["n15"]=11;
    xon["n240"]=12;
    xon["n128"]=13;
    xon["n2863311530"]=14;
    xon["n4294967295"]=15;
    xon["n-1"]=15;   # might need to do negatives for the other big #s
}


function addword(wrd) {
    if (comflag!=0) {
	if (wrd==")") {
	    comflag--;
	    return;
	}
    }
    if (wrd=="(") {
	comflag++;
	return;
    }
    if (comflag) return;
    if (wrd=="macro" || wrd=="inline") {
	is_mac=1;
	return;
    }
    word[cword]=word[cword] " " wrd
    if (done[wrd]!="") {
	settodo(wrd)
    }
    if (emitflag1!=1) cword_ct++;
# count asm as 1 word
    if (wrd=="{") { is_asm=1; emitflag1=1; }
    if (wrd=="}" && emitflag1==1) emitflag1=0;

}


function checkalias() {
    if (cword_ct==1) {
	reword=substr(word[cword],3)
# add any immediate words that might appear alone here
# in theory ought to check all of them, but the rest
# would be an error if cword_ct==1 anyway
	if (reword=="recurse") return;
	if (isnum(reword)) {
	    # instead of calling a push, just
	    # mark as a constant and emit the literal code
	    cons[cword]=reword
            delete word[cword]
	    delete alias[cword]
	    return;
	}  else delete cons[cword]
	alias[cword]=reword
	delete word[cword]
       delete cons[cord]
    } else  {
	delete alias[cword] 
#       delete cons[cword]  
    }
}

function procmacro()
{
    macro[cword]=word[cword];
    delete word[cword];
}

BEGIN {
    init();
}

/^[ \t]*$/ { next; }
/^[ \t]*\\/ { next; }

# limitations on input syntax
# : must be first thing on first line
# word must be on same line as colon
# ; must be last thing on last line (others ignored)
# string literals will have other limits
# macros (: and pushfu, popfu, etc. that read the next word)
# may not have comments in the middle and must not split lines
# nn constant name  <- appears on one line creates a word
isnum($1) && tolower($2)=="constant" {
    $0=": " $3 " " $1 " ;"
}

/^[ \t]*[vV][aA][rR][iI][aA][bB][lL][eE]/ {
    vsym[$2]=here   # probably don't need this
    cons[$2]=here
    incr=1;
    if ($3!="") incr=$3;
    here+=incr;
    next;
}

isnum($1) && tolower($2)=="value" {
   vinit[$3]=$1
   cons[$3]=here;
   vsym[$3]=here++;
   next;
}


# register based call
$1==":" && $3=="regcall" {
    if (regcallnum==0x1F)     print "Warning: regcall allocating register 0x1F. " >"/dev/stderr" 
    n=regcallnum--;
    delete word[$2]
    delete alias[$2] # verify
#    macro[$2]=": { MOV R(" n "), FPC_CALL }"
    macro[$2]=": { CALLR " n " }"
    $3=""
    xtra[$2]="__r_" $2
    $2= "__r_" $2

    reginit[$2]=n
# keep on 
}


# string literal must start with " (including space) and then go for whole line
$1=="\"" {
    l=$2
    stringlbl[nextstring]=$2
    instring=$3
    if ($3=="") instring="\""
    strpack[nextstring]=($4~"^[pP][aA][cC][kK]$")
    strcount[nextstring]=0
    for (i=4;i<=NF;i++) {
	if ($i=="#") strcount[nextstring]=1;
	if ($i~"^[pP][aA][cC][kK]"||$i=="#") continue;
	s=sprintf("\\%03o",strtonum($i))
	  strings[nextstring]=strings[nextstring] s
    }
    strlinect=0
    OldFS=FS
    FS="\n"
    next
}




$1==":" && comflag==0 {
    if (cword!="") err("Nested colon definition or nested semicolon");
    if (opt_case) cword=tolower($2); else cword=$2;
    cword_ct=0;
    word[cword]=":";  # place holder
    is_mac=0;
    is_asm=0;
    for (i=3;i<=NF;i++) {
	if ($i==";" && !comflag) {
	    if (is_asm==1 && cword_ct==1) { is_mac=1; cword_ct++; }
	    if (is_mac==1 && cword_ct>1) {
		procmacro();
	    } else delete macro[cword];
	    checkalias();
	    cword="";
	    is_mac=0;
	    break;
	}
	addword($i);
    }
    next;
    
}


  {
      if (cword=="" && comflag==0 && $1!="(") err("Only colon defintions allowed");  # this is an error!
      for (i=1;i<=NF;i++) {
	  if ($i==";" && !comflag) {
	    if (is_asm==1 && cword_ct==1) { is_mac=1; cword_ct++; }
	    if (is_mac==1 && cword_ct>1) {
		procmacro();
	    } else delete macro[cword];
	      checkalias();
	      cword="";
	      is_mac=0;
	      break;
	  }
	  addword($i);
      }
  }

END {
    if (!errflag) {
	if (cword!="") err("Unexpected end of file inside " cword);
	postinit();
	compile();
    }
}


