Fix several bugs to get Python scanner/parser basically working, processes ../tests...
[pilex.git] / bisect_set.py
1 # defines the alphabet size, set this to 0x11000 for unicode
2 n_characters = 0x100
3
4 def bisect_set_or(character_set0, character_set1):
5   # calculate union of the child sets
6   # we do this by calculating a series of breakpoints, at each breakpoint
7   # evaluating the "or" (max) of the even/odd truth values of each child,
8   # then making the output truth value even/odd by outputting if necessary
9   result = []
10   i = 0
11   j = 0
12   while True:
13     if i < len(character_set0):
14       k = character_set0[i]
15       if j < len(character_set1):
16         k = min(k, character_set1[j])
17     elif j < len(character_set1):
18       k = character_set1[j]
19     else:
20       break
21     if i < len(character_set0) and character_set0[i] == k:
22       i += 1
23     if j < len(character_set1) and character_set1[j] == k:
24       j += 1
25     if (len(result) & 1) != max(i & 1, j & 1):
26       result.append(k)
27   assert (i & 1) == 0 and (j & 1) == 0
28   return result
29
30 def bisect_set_and(character_set0, character_set1):
31   # calculate intersection of the child sets
32   # we do this by calculating a series of breakpoints, at each breakpoint
33   # evaluating the "and" (min) of the even/odd truth values of each child,
34   # then making the output truth value even/odd by outputting if necessary
35   result = []
36   i = 0
37   j = 0
38   while True:
39     if i < len(character_set0):
40       k = character_set0[i]
41       if j < len(character_set1):
42         k = min(k, character_set1[j])
43     elif j < len(character_set1):
44       k = character_set1[j]
45     else:
46       break
47     if i < len(character_set0) and character_set0[i] == k:
48       i += 1
49     if j < len(character_set1) and character_set1[j] == k:
50       j += 1
51     if (len(result) & 1) != min(i & 1, j & 1):
52       result.append(k)
53   assert (i & 1) == 0 and (j & 1) == 0
54   return result
55
56 def bisect_set_not(character_set):
57   # calculate complement of the child set
58   # if child set begins with [0], remove it, otherwise add [0] prefix
59   # if child set ends with [n_characters], remove it, otherwise add [n_characters] suffix
60   # the suffix part is not totally necessary, but makes sure length is even
61   # (the evenness is so that single character sets can always be [c, c + 1])
62   result = list(character_set)
63   if result[:1] == [0]:
64     del result[:1]
65   else:
66     result[:0] = [0]
67   if result[-1:] == [n_characters]:
68     del result[-1:]
69   else:
70     result.append(n_characters)
71   return result