/* This file contains code to do profiling. Copyright (C) 2007-2014 Free Software Foundation, Inc. Contributor: Joern Rennecke on behalf of Synopsys Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #include "../asm.h" #include "auxreg.h" /* This file contains code to do profiling. */ .weak __profile_timer_cycles .global __profile_timer_cycles .set __profile_timer_cycles, 200 .section .bss .global __profil_offset .align 4 .type __profil_offset, @object .size __profil_offset, 4 __profil_offset: .zero 4 .text .global __dcache_linesz .global __profil FUNC(__profil) .Lstop_profiling: sr r0,[CONTROL0] j_s [blink] .balign 4 __profil: .Lprofil: breq_s r0,0,.Lstop_profiling ; r0: buf r1: bufsiz r2: offset r3: scale bxor.f r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0. push_s blink lsr_s r2,r2,1 mov_s r8,r0 flag.ne 1 ; halt if wrong scale sub_s r0,r0,r2 st r0,[__profil_offset] bl __dcache_linesz pop_s blink bbit1.d r0,0,nocache mov_s r0,r8 #ifdef __ARC700__ add_s r1,r1,31 lsr.f lp_count,r1,5 lpne 2f sr r0,[DC_FLDL] add_s r0,r0,32 #else /* !__ARC700__ */ # FIX ME: set up loop according to cache line size lr r12,[D_CACHE_BUILD] sub_s r0,r0,16 sub_s r1,r1,1 lsr_s r12,r12,16 asr_s r1,r1,4 bmsk_s r12,r12,3 asr_s r1,r1,r12 add.f lp_count,r1,1 mov_s r1,16 asl_s r1,r1,r12 lpne 2f add r0,r0,r1 sr r0,[DC_FLDL] #endif /* __ARC700__ */ 2: b_s .Lcounters_cleared nocache: .Lcounters_cleared: lr r1,[INT_VECTOR_BASE] ; disable timer0 interrupts sr r3,[CONTROL0] sr r3,[COUNT0] 0: ld_s r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF 0: ld_s r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4 st_s r0,[r1,24]; timer0 uses vector3 st_s r12,[r1,24+4]; timer0 uses vector3 ;sr 10000,[LIMIT0] sr __profile_timer_cycles,[LIMIT0] mov_s r12,3 ; enable timer interrupts; count only when not halted. sr r12,[CONTROL0] lr r12,[STATUS32] bset_s r12,r12,1 ; allow level 1 interrupts flag r12 mov_s r0,0 j_s [blink] .balign 4 1: j __profil_irq ENDFUNC(__profil) FUNC(__profil_irq) .balign 4 ; make final jump unaligned to avoid delay penalty .balign 32,0,12 ; make sure the code spans no more that two cache lines nop_s __profil_irq: push_s r0 ld r0,[__profil_offset] push_s r1 lsr r1,ilink1,2 push_s r2 ldw.as.di r2,[r0,r1] add1 r0,r0,r1 ld_s r1,[sp,4] add_s r2,r2,1 bbit1 r2,16,nostore stw.di r2,[r0] nostore:ld.ab r2,[sp,8] pop_s r0 j.f [ilink1] ENDFUNC(__profil_irq) ; could save one cycle if the counters were allocated at link time and ; the contents of __profil_offset were pre-computed at link time, like this: #if 0 ; __profil_offset needs to be PROVIDEd as __profile_base-text/4 .global __profil_offset .balign 4 __profil_irq: push_s r0 lsr r0,ilink1,2 add1 r0,__profil_offset,r0 push_s r1 ldw.di r1,[r0] add_s r1,r1,1 bbit1 r1,16,nostore stw.di r1,[r0] nostore:pop_s r1 pop_s r0 j [ilink1] #endif /* 0 */