1 # defines the alphabet size, set this to 0x11000 for unicode
4 def bisect_set_or(character_set0, character_set1):
5 # calculate union of the child sets
6 # we do this by calculating a series of breakpoints, at each breakpoint
7 # evaluating the "or" (max) of the even/odd truth values of each child,
8 # then making the output truth value even/odd by outputting if necessary
13 if i < len(character_set0):
15 if j < len(character_set1):
16 k = min(k, character_set1[j])
17 elif j < len(character_set1):
21 if i < len(character_set0) and character_set0[i] == k:
23 if j < len(character_set1) and character_set1[j] == k:
25 if (len(result) & 1) != max(i & 1, j & 1):
27 assert (i & 1) == 0 and (j & 1) == 0
30 def bisect_set_and(character_set0, character_set1):
31 # calculate intersection of the child sets
32 # we do this by calculating a series of breakpoints, at each breakpoint
33 # evaluating the "and" (min) of the even/odd truth values of each child,
34 # then making the output truth value even/odd by outputting if necessary
39 if i < len(character_set0):
41 if j < len(character_set1):
42 k = min(k, character_set1[j])
43 elif j < len(character_set1):
47 if i < len(character_set0) and character_set0[i] == k:
49 if j < len(character_set1) and character_set1[j] == k:
51 if (len(result) & 1) != min(i & 1, j & 1):
53 assert (i & 1) == 0 and (j & 1) == 0
56 def bisect_set_not(character_set):
57 # calculate complement of the child set
58 # if child set begins with [0], remove it, otherwise add [0] prefix
59 # if child set ends with [n_characters], remove it, otherwise add [n_characters] suffix
60 # the suffix part is not totally necessary, but makes sure length is even
61 # (the evenness is so that single character sets can always be [c, c + 1])
62 result = list(character_set)
67 if result[-1:] == [n_characters]:
70 result.append(n_characters)