New Tokenizer - Made for codes - GameCreators Forum

Author

Message

Zotoaster

19

Years of Service

User Offline

Joined: 20th Dec 2004

Location: Scotland

Posted: 23rd Dec 2006 18:20

Link

Yet another small achievement in my everlasting quest to make the ultimate programming language

Unlike my last tokenizer, which basically took a string, and got each chuck, depending on which letter you wanted to split up the line. This one is different, and is used for source codes.

i.e. if I had this code:

+ Code Snippet

makeObjectCube(1,100)

These are the sections it would be split up into, and the token type (integer)

+ Code Snippet

"makeObjectCube" - 1 (string)
"(" - 3 (operator)
"1" - 2 (number)
"," - 11 (operator)
"100" - 2 (number)
")" - 4 (operator)

which then can all be used as data for whatever you wish to do with it.

Here's the DBPro code with a very simple demonstration.

`Setup
sync on : sync rate 60

`Setup Tokens
type tToken
   str as string
   ttype as integer
endtype
dim token(0) as tToken

`Operator List
dim oplist(0) as string
setOpList("( ) + - / * = ; , chr$(34)")

`Read/Scan Code
`code$=readCode("code.txt","`")
code$="syncOn(60);"
scnCode(code$)

`Main Loop
do

for t=1 to array count(token())
      text 10,20*t,token(t).str + " - " + str$(token(t).ttype)
   next t

`End Loop
sync
cls
loop

`Function - Code
function readCode(txt$,rem$)
   linec=getFileLines(txt$)
   open to read 1,txt$
      for l=1 to linec
         read string 1,cur$
         if left$(removeSpaces(cur$),len(rem$))<>rem$
            code$=code$+cur$
         endif
      next l
   close file 1
endfunction code$
function scnCode(code$)
for c=1 to len(code$)
   token$=""
   cur$=mid$(code$,c)
   oldOpType=oprtype
   oprtype=opType(cur$)
   if oprtype<>oldoptype
      array insert at bottom token() : ctok=array count(token())
      token(ctok).ttype=oprtype
   endif
   token(ctok).str=token(ctok).str+cur$
next c
endfunction
function OpType(text$)
output=1
   if isNum(text$) then output=2
   for o=1 to array count(oplist())
      if text$=oplist(o) then output=o+2
   next o
endfunction output

`Operator List
function setOpList(list$)
   count=getTokens(list$," ")
   for c=1 to count
      array insert at bottom opList()
      opList(c)=getToken(list$," ",c)
   next c
endfunction

`Functions - String
function getTokens(st$,sp$)
if left$(st$,1)<>sp$ then nst$=sp$+st$
if right$(st$,1)<>sp$ then nst$=nst$+sp$
repeat
   inc counter
   cur$=mid$(nst$,counter)
   if cur$=sp$ then inc splits
until counter>len(nst$)
tokens=splits-1
endfunction tokens
function getToken(st$,sp$,num)
if left$(st$,1)<>sp$ then nst$=sp$+st$
if right$(st$,1)<>sp$ then nst$=nst$+sp$
   repeat
      inc counter
      cur$=mid$(nst$,counter)
      if cur$=sp$ then inc splits
   until splits=num or counter>len(st$)
   repeat
      inc counter
      cur$=mid$(nst$,counter)
      if cur$<>sp$ then token$=token$+cur$
   until cur$=sp$ or counter>len(st$)
endfunction token$
function removeChar(st$,ch$)
for l=1 to len(st$)
   cur$=mid$(st$,l)
   if cur$<>ch$ then mes$=mes$+cur$
next l
endfunction mes$
function strSect(st$,a,b)
   for l=a to b
      temp$=mid$(st$,l)
      sect$=sect$+temp$
   next l
endfunction sect$
function linSearch(st$,sch$,cnt)
   for p=1 to len(st$)
      if strSect(st$,p,p+len(sch$))=sch$ then inc count
      if count=cnt then pos=p
   next p
endfunction pos
function removeSpaces(ent$)
   if left$(ent$,1)=" "
      for i=1 to len(ent$)
         if mid$(ent$,i)=" "
          inc spaces
         else
          i=999
         endif
      next i
          for i=spaces+1 to len(ent$)
            ret$=ret_$+mid$(ent$,i)
          next i
    endif
endfunction ret$
function isLetter(text$)
   if asc(text$)>=asc("a") and asc(text$)<=asc("z") then alpha=1
   if asc(text$)>=asc("A") and asc(text$)<=asc("Z") then alpha=1
endfunction alpha
function isNum(text$)
   if asc(text$)>=asc("0") and asc(text$)<=asc("9") then num=1
endfunction num

`Functions - File
function getFileLines(file$)
local lines as integer
   open to read 1,file$
      repeat
         read string 1,null$
         inc lines
      until file end(1)
   close file 1
endfunction lines

+ Code Snippet

`Setup
sync on : sync rate 60

`Setup Tokens
type tToken
   str as string
   ttype as integer
endtype
dim token(0) as tToken

`Operator List
dim oplist(0) as string
setOpList("( ) + - / * = ; , chr$(34)")


`Read/Scan Code
`code$=readCode("code.txt","`")
code$="syncOn(60);"
scnCode(code$)


`Main Loop
do

   for t=1 to array count(token())
      text 10,20*t,token(t).str + " - " + str$(token(t).ttype)
   next t


`End Loop
sync
cls
loop



`Function - Code
function readCode(txt$,rem$)
   linec=getFileLines(txt$)
   open to read 1,txt$
      for l=1 to linec
         read string 1,cur$
         if left$(removeSpaces(cur$),len(rem$))<>rem$
            code$=code$+cur$
         endif
      next l
   close file 1
endfunction code$
function scnCode(code$)
for c=1 to len(code$)
   token$=""
   cur$=mid$(code$,c)
   oldOpType=oprtype
   oprtype=opType(cur$)
   if oprtype<>oldoptype
      array insert at bottom token() : ctok=array count(token())
      token(ctok).ttype=oprtype
   endif
   token(ctok).str=token(ctok).str+cur$
next c
endfunction
function OpType(text$)
output=1
   if isNum(text$) then output=2
   for o=1 to array count(oplist())
      if text$=oplist(o) then output=o+2
   next o
endfunction output


`Operator List
function setOpList(list$)
   count=getTokens(list$," ")
   for c=1 to count
      array insert at bottom opList()
      opList(c)=getToken(list$," ",c)
   next c
endfunction

`Functions - String
function getTokens(st$,sp$)
if left$(st$,1)<>sp$ then nst$=sp$+st$
if right$(st$,1)<>sp$ then nst$=nst$+sp$
repeat
   inc counter
   cur$=mid$(nst$,counter)
   if cur$=sp$ then inc splits
until counter>len(nst$)
tokens=splits-1
endfunction tokens
function getToken(st$,sp$,num)
if left$(st$,1)<>sp$ then nst$=sp$+st$
if right$(st$,1)<>sp$ then nst$=nst$+sp$
   repeat
      inc counter
      cur$=mid$(nst$,counter)
      if cur$=sp$ then inc splits
   until splits=num or counter>len(st$)
   repeat
      inc counter
      cur$=mid$(nst$,counter)
      if cur$<>sp$ then token$=token$+cur$
   until cur$=sp$ or counter>len(st$)
endfunction token$
function removeChar(st$,ch$)
for l=1 to len(st$)
   cur$=mid$(st$,l)
   if cur$<>ch$ then mes$=mes$+cur$
next l
endfunction mes$
function strSect(st$,a,b)
   for l=a to b
      temp$=mid$(st$,l)
      sect$=sect$+temp$
   next l
endfunction sect$
function linSearch(st$,sch$,cnt)
   for p=1 to len(st$)
      if strSect(st$,p,p+len(sch$))=sch$ then inc count
      if count=cnt then pos=p
   next p
endfunction pos
function removeSpaces(ent$)
   if left$(ent$,1)=" "
      for i=1 to len(ent$)
         if mid$(ent$,i)=" "
          inc spaces
         else
          i=999
         endif
      next i
          for i=spaces+1 to len(ent$)
            ret$=ret_$+mid$(ent$,i)
          next i
    endif
endfunction ret$
function isLetter(text$)
   if asc(text$)>=asc("a") and asc(text$)<=asc("z") then alpha=1
   if asc(text$)>=asc("A") and asc(text$)<=asc("Z") then alpha=1
endfunction alpha
function isNum(text$)
   if asc(text$)>=asc("0") and asc(text$)<=asc("9") then num=1
endfunction num

`Functions - File
function getFileLines(file$)
local lines as integer
   open to read 1,file$
      repeat
         read string 1,null$
         inc lines
      until file end(1)
   close file 1
endfunction lines