--The program acquires a 10bit TDC timestamp and computes the Fourier code encoding using 4 256bin histograms!
--	the encoded 16bit output is in RAM(0..63)
--  the cosine LUT is in byte0 each RAM location from RAM(64) to RAM(127) (i.e. 64 8bit values for the 0 to pi/4 cosine range)
-- 	the multiplication coefficient for this core is hard coded
--	this code is designed to run on the core in the upper right corner of the 2x2 cluster as it will accumulate all the final values


#define processed_timestamp 0   				--register R1 is used for holding the processed timestamp
#define timestamp 1								--register R1 is used for holding the timestamp
#define cosine 2								--register R2 is used for holding the cosine of the timestamp
#define sine 3									--register R3 is used for holding the sine of the timestamp
#define R4 4				    				--register R4 is used by the LUT function
#define R5 5									--register R5 is used for miscelaneous tasks
#define CtrlOut 0b10000							--address for external trigger signal

0: SETOR 0, $0xFFFF 							--set mask for ORTree0 to allow any pixel to be used as START
1: SETTM $0b0000010011101111 					--TIME0 configured to be: TDC with internal reference, always on, will measure distance between pulse from ORTree0 and signal from CPU
2: STIMCNT										--reset input latch


3: RSTCNT										--reset TDC
4: TELL CtrlOut, 0								--strobe trigger
5: CALL 160										--expose

6: GETC timestamp, 0							--read TDC result
7: SPLIT timestamp, 1, processed_timestamp 		--only keep the lower 8 bits of the number i.e. modulo 256

8: LOAD R5, $0, 1								--reset counter to 8 (for number of values in encoding)
9: LOAD R5, $16, 0	

10: CALL 100 									--find cosine and sine values of processed_timestamp

11: OR R5, R5, R5								--clear carry flag 

---------------------------------------------------------------------------------------------------------
12: LOAD R4, $512 ,0							--load R4 as a mask to check bit 10 of the timestamp
13: LOAD R4, $0, 1
14: AND R4, processed_timestamp, R4				--check bit 10
15: JUMPZ 24
16: LOAD R4, $256 ,0							--load R4 as a mask to check bit 9 of the timestamp
17: LOAD R4, $0, 1
18: AND R4, processed_timestamp, R4				--check bit 9
19: JUMPZ 22
-- timestamp is in [768, 1023]
20: LOAD R4, $48, 0 							--will increment in the fourth histogram, offset 48
21: JUMP 50
-- timestamp is in [512, 767]
22: LOAD R4, $32, 0 							--will increment in the third histogram, offset 32
23: JUMP 50

24: LOAD R4, $256 ,0							--load R4 as a mask to check bit 9 of the timestamp
25: LOAD R4, $0, 1
26: AND R4, processed_timestamp, R4				--check bit 9
27: JUMPZ 30
-- timestamp is in [256, 511]
28: LOAD R4, $32, 0 							--will increment in the second histogram, offset 32
29: JUMP 50
-- timestamp is in [0, 255]
30: LOAD R4, $0, 0 								--will increment in the first histogram, offset 0
31: JUMP 50
---------------------------------------------------------------------------------------------------------


50: ADD (R4), cosine, (R4)						--accumulate cosine
51: OR R5, R5, R5								--clear carry flag 
52: SUBC R5, $1									--move to next component
53: ADD (R4), sine, (R4)						--accumulate sine
54: OR R5, R5, R5								--clear carry flag 
55: ADD timestamp, processed_timestamp, processed_timestamp 	--increment trig argument
56: SPLIT processed_timestamp, 1, processed_timestamp  			--only keep the lower 8 bits of the number i.e. modulo 256
57: OR R5, R5, R5								--clear carry flag 
58: ADDC R4, $1									--move to next component
59: SUBC R5, $1							
60: JUMPNZ 50									--if not done with all the values, repeat 
61: JUMPNCTRL 0, 3 								--move on to next timestamp unless requested to stop

---------------------------------------------------------------------------------------------------------
62: LOAD R4, $0, 1								--multiplication coefficient for this core
63: LOAD R4, $24, 0								--multiplication coefficient for this core

--multiply with coeff
64: LOAD R5, $63, 0								--initialize counter to 63
65: MUL (R5), R4, (R5)							--multiply with coeff
66: OR R5, R5, R5								--clear carry flag
67: SUBC R5, $1									--decrement counter
68: JUMPNZ 65									--if not done, repeat 
69: MUL (R5), R4, (R5)							--perform final multiplication for counter = 0

--get values from SOUTH neighbour and accumulate
70: LOAD R5, $63, 0								--initialize counter to 63
71: SAVEN 2										--save data from SOUTH neighbour
72: GETN 1, 0b010000 ,0							--move data from neighbour into R4
73: ADD (R5), R4, (R5)							--accumulate histogram bin
74: OR R5, R5, R5								--clear flag
75: SUBC R5, $1									--decrement counter
76: JUMPNZ 71									--repeat until done with all bins
--last step, for counter = 0
77: SAVEN 2										--save data from SOUTH neighbour
78: GETN 1, 0b010000 ,0							--move data from WEST neighbour into R4
79: ADD (R5), R4, (R5)							--accumulate histogram bin 

--get values from WEST neighbour and accumulate
80: LOAD R5, $63, 0								--initialize counter to 63
81: SAVEN 4										--save data from WEST neighbour
82: GETN 2, 0b010000 ,0							--move data from WEST neighbour into R4
83: ADD (R5), R4, (R5)							--accumulate histogram bin
84: OR R5, R5, R5								--clear flag
85: SUBC R5, $1									--decrement counter
86: JUMPNZ 81									--repeat until done with all bins
--last step, for counter = 0
87: SAVEN 4										--save data from WEST neighbour
88: GETN 2, 0b010000 ,0							--move data from WEST neighbour into R4
89: ADD (R5), R4, (R5)							--accumulate histogram bin

-----------------------------------------------------------------------------------------------------------
90: JUMP 90										--stay here


----------------------------------------------------------------------------------------------------------------------------------------------
-- LUT function; will compute the cosine and sine values of the processed_timestamp
100: LOAD R4, $0, 1								--load 64 into R4
101: LOAD R4, $64, 0
102: CMP processed_timestamp, R4
103: JUMPC 120									--the timestamp is in [0, 63]

104: LOAD R4, $128, 0							--load 128 into R4
105: CMP processed_timestamp, R4
106: JUMPC 130									--the timestamp is in [64, 127]

107: LOAD R4, $192, 0      						--load 192 into R4
108: CMP processed_timestamp, R4
109: JUMPC 140									--the timestamp is in [128, 191]

--the timestamp is in [192, 255]
110: LOAD R4, $319, 0 							--load LUT address offset plus the value for cosine
111: OR R5, R5, R5								--clear carry flag
112: SUB R4, processed_timestamp, cosine		--compute the index for the cosine value: 255-x+64
113: LOAD R4, $128, 0 							--load LUT address offset plus the value for sine
114: OR R5, R5, R5								--clear carry flag
115: SUB processed_timestamp, R4, sine   		--compute the index for the sine value: x-192+64 
116: FETCH (cosine), cosine, 0					--get values from LUT
117: FETCH (sine), sine, 0
118: RET 										--return

--the timestamp is in [0, 63]
120: LOAD R4, $64, 0 							--load 64 into R4 i.e. the LUT address offset in the RAM
121: OR R5, R5, R5								--clear carry flag
122: ADD processed_timestamp, R4, cosine		--compute the index for the cosine value: x+64
123: LOAD R4, $127, 0 							--load 127 into R4 i.e. the LUT address offset in the RAM plus the value for sine
124: SUB R4, processed_timestamp, sine   		--compute the index for the sine value: 63-x+64 
125: FETCH (cosine), cosine, 0					--get values from LUT
126: FETCH (sine), sine, 0
127: NEG sine, sine								--negate sine
128: RET 										--return

--the timestamp is in [64, 127]
130: LOAD R4, $191, 0 							--load LUT address offset plus the value for cosine
131: OR R5, R5, R5								--clear carry flag
132: SUB R4, processed_timestamp, cosine		--compute the index for the cosine value: 127-x+64
133: FETCH (cosine), cosine, 0					--get values from LUT
134: FETCH (processed_timestamp), sine, 0		--the index for sine is x-64+64=x, no operation needed
135: NEG cosine, cosine							--negate cosine
136: NEG sine, sine								--negate sine
137: RET 										--return

--the timestamp is in [128, 191]
140: LOAD R4, $64, 0 							--load LUT address offset plus the value for cosine
141: OR R5, R5, R5								--clear carry flag
142: SUB processed_timestamp, R4, cosine		--compute the index for the cosine value: x-128+64
143: LOAD R4, $255, 0 							--load LUT address offset plus the value for sine
144: OR R5, R5, R5								--clear carry flag
145: SUB R4, processed_timestamp, sine   		--compute the index for the sine value: 191-x+64 
146: FETCH (cosine), cosine, 0					--get values from LUT
147: FETCH (sine), sine, 0
148: NEG cosine, cosine							--negate cosine
149: RET 										--return


----------------------------------------------------------------------------------------------------------------------------------------------
-- delay function; will stay here for 440ns to limit TDC range to 10bit
160: LOAD R5, $0, 1								--set R5 to 24 
161: LOAD R5, $24, 0
162: SUBC R5, $1								--decrement R5
163: JUMPNZ 162									--repeat until zero
164: RET 										--return