Yet another small achievement in my everlasting quest to make the ultimate programming language
Unlike my last tokenizer, which basically took a string, and got each chuck, depending on which letter you wanted to split up the line. This one is different, and is used for source codes.
i.e. if I had this code:
These are the sections it would be split up into, and the token type (integer)
"makeObjectCube" - 1 (string)
"(" - 3 (operator)
"1" - 2 (number)
"," - 11 (operator)
"100" - 2 (number)
")" - 4 (operator)
which then can all be used as data for whatever you wish to do with it.
Here's the DBPro code with a very simple demonstration.
`Setup
sync on : sync rate 60
`Setup Tokens
type tToken
str as string
ttype as integer
endtype
dim token(0) as tToken
`Operator List
dim oplist(0) as string
setOpList("( ) + - / * = ; , chr$(34)")
`Read/Scan Code
`code$=readCode("code.txt","`")
code$="syncOn(60);"
scnCode(code$)
`Main Loop
do
for t=1 to array count(token())
text 10,20*t,token(t).str + " - " + str$(token(t).ttype)
next t
`End Loop
sync
cls
loop
`Function - Code
function readCode(txt$,rem$)
linec=getFileLines(txt$)
open to read 1,txt$
for l=1 to linec
read string 1,cur$
if left$(removeSpaces(cur$),len(rem$))<>rem$
code$=code$+cur$
endif
next l
close file 1
endfunction code$
function scnCode(code$)
for c=1 to len(code$)
token$=""
cur$=mid$(code$,c)
oldOpType=oprtype
oprtype=opType(cur$)
if oprtype<>oldoptype
array insert at bottom token() : ctok=array count(token())
token(ctok).ttype=oprtype
endif
token(ctok).str=token(ctok).str+cur$
next c
endfunction
function OpType(text$)
output=1
if isNum(text$) then output=2
for o=1 to array count(oplist())
if text$=oplist(o) then output=o+2
next o
endfunction output
`Operator List
function setOpList(list$)
count=getTokens(list$," ")
for c=1 to count
array insert at bottom opList()
opList(c)=getToken(list$," ",c)
next c
endfunction
`Functions - String
function getTokens(st$,sp$)
if left$(st$,1)<>sp$ then nst$=sp$+st$
if right$(st$,1)<>sp$ then nst$=nst$+sp$
repeat
inc counter
cur$=mid$(nst$,counter)
if cur$=sp$ then inc splits
until counter>len(nst$)
tokens=splits-1
endfunction tokens
function getToken(st$,sp$,num)
if left$(st$,1)<>sp$ then nst$=sp$+st$
if right$(st$,1)<>sp$ then nst$=nst$+sp$
repeat
inc counter
cur$=mid$(nst$,counter)
if cur$=sp$ then inc splits
until splits=num or counter>len(st$)
repeat
inc counter
cur$=mid$(nst$,counter)
if cur$<>sp$ then token$=token$+cur$
until cur$=sp$ or counter>len(st$)
endfunction token$
function removeChar(st$,ch$)
for l=1 to len(st$)
cur$=mid$(st$,l)
if cur$<>ch$ then mes$=mes$+cur$
next l
endfunction mes$
function strSect(st$,a,b)
for l=a to b
temp$=mid$(st$,l)
sect$=sect$+temp$
next l
endfunction sect$
function linSearch(st$,sch$,cnt)
for p=1 to len(st$)
if strSect(st$,p,p+len(sch$))=sch$ then inc count
if count=cnt then pos=p
next p
endfunction pos
function removeSpaces(ent$)
if left$(ent$,1)=" "
for i=1 to len(ent$)
if mid$(ent$,i)=" "
inc spaces
else
i=999
endif
next i
for i=spaces+1 to len(ent$)
ret$=ret_$+mid$(ent$,i)
next i
endif
endfunction ret$
function isLetter(text$)
if asc(text$)>=asc("a") and asc(text$)<=asc("z") then alpha=1
if asc(text$)>=asc("A") and asc(text$)<=asc("Z") then alpha=1
endfunction alpha
function isNum(text$)
if asc(text$)>=asc("0") and asc(text$)<=asc("9") then num=1
endfunction num
`Functions - File
function getFileLines(file$)
local lines as integer
open to read 1,file$
repeat
read string 1,null$
inc lines
until file end(1)
close file 1
endfunction lines