|
本帖最后由 Vendicare 于 2012-4-8 23:01 编辑
66666 发表于 2012-4-7 09:18 ![]()
PS3和XO没有一个游戏是真正直接利用汇编操作GPU。没有openGL es和DX中间层你游戏机一个游戏都玩不了。就是 ...
你是一行代码都没写过吧?现代计算机程序里没什么软件是全用ASM写的,也没几个游戏不用ASM。全用ASM程序没有可维护性,几十k的程序过几年开发者自己都看不明白了。完全不用asm写效率偏低,用户体验不很好。截至目前完全没用ASM的ps3游戏估计还没出来,完全用asm写就的ps2游戏也不存在。
不管是SSE还是SPU这些SIMD系统都没法靠编译器自动优化(自动优化性能提升几乎可以忽略)。除非有能够自动理解上下文语义的超级编译器出现,否则ASM永远都是编写程序中性能敏感部分的主要工具之一。
用全SPE性能的代码示例(要是什么东西都能用C++解决,程序员就不这么苦逼了):
.data
#This is the struct we will copy from the main PPE process
.align 4
conversion_info:
conversion_length:
.octa 0
conversion_data:
.octa 0
.equ CONVERSION_STRUCT_SIZE, 32
.section .bss #Uninitialized Data Section
#This is the buffer we will store the string in
.align 4
.lcomm conversion_buffer, 16384
.text
#MFC Constants
.equ MFC_GET_CMD, 0x40
.equ MFC_PUT_CMD, 0x20
.equ LR_OFFSET, 16
.global main
.type main, @function
.equ MAIN_FRAME_SIZE, 32
main:
#Prologue
stqd $lr, LR_OFFSET($sp)
stqd $sp, -MAIN_FRAME_SIZE($sp)
ai $sp, $sp, -MAIN_FRAME_SIZE
##COPY IN CONVERSION INFORMATION##
ila $3, conversion_info #Local Store Address
#register 4 already has address #64-bit Effective Address
il $5, CONVERSION_STRUCT_SIZE #Transfer size
il $6, 0 #DMA Tag
il $7, MFC_GET_CMD #DMA Command
brsl $lr, perform_dma
#Wait for DMA to complete
il $3, 0
brsl $lr, wait_for_dma_completion
##COPY STRING IN TO BUFFER##
#Load buffer data pointer
ila $3, conversion_buffer #Local Store
lqr $4, conversion_data #64-bit Effective Address
lqr $5, conversion_length #SIZE
il $6, 0 #DMA Tag
il $7, MFC_GET_CMD #DMA Command
brsl $lr, perform_dma
#Wait for DMA to complete
il $3, 0
brsl $lr, wait_for_dma_completion
##PERFORM CONVERSION##
ila $3, conversion_buffer
lqr $4, conversion_length
brsl $lr, convert_buffer_to_upper
##COPY DATA BACK##
ila $3, conversion_buffer #Local Store Address
lqr $4, conversion_data #64-bit effective address
lqr $5, conversion_length #Size
il $6, 0 #DMA Tag
il $7, MFC_PUT_CMD #DMA Command
brsl $lr, perform_dma
#Wait for DMA to complete
il $3, 0
brsl $lr, wait_for_dma_completion
##EXIT PROGRAM##
#Return Value
il $3, 0
#Epilogue
ai $sp, $sp, MAIN_FRAME_SIZE
lqd $lr, LR_OFFSET($sp)
bi $lr |
|