offset = 0 to 19,步长为 20
1.起始地址开始,在每个offset地址写原始 data (以 offset 方式初始化背景数)
2.其余地址写相反之原始 data
3.repeat step2,1次或多次
4.读 offset 地址,比对资料是否正确
/*
* Test all of memory using modulo X access pattern.
*/
void modtst(int offset, int iter, ulong p1, ulong p2, int me)
{
int j, k, l, done;
ulong *p;
ulong *pe;
ulong *start, *end;
/* Display the current pattern */
if (mstr_cpu == me) {
hprint(LINE_PAT, COL_PAT-2, p1);
cprint(LINE_PAT, COL_PAT+6, "-");
dprint(LINE_PAT, COL_PAT+7, offset, 2, 1);
}
/* Write every nth location with pattern */
for (j=0; j<segs; j++) {
calculate_chunk(&start, &end, me, j, 4);
end -= MOD_SZ; /* adjust the ending address */
pe = (ulong *)start;
p = start+offset;
done = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p += MOD_SZ) {
* *p = p1;
* }
*/
asm __volatile__ (
"jmp L60\n\t" \
".p2align 4,,7\n\t" \
"L60:\n\t" \
"movl %%eax,(%%edi)\n\t" \
"addl $80,%%edi\n\t" \
"cmpl %%edx,%%edi\n\t" \
"jb L60\n\t" \
: "=D" (p)
: "D" (p), "d" (pe), "a" (p1)
);
} while (!done);
}
/* Write the rest of memory "iter" times with the pattern complement */
for (l=0; l<iter; l++) {
for (j=0; j<segs; j++) {
calculate_chunk(&start, &end, me, j, 4);
pe = (ulong *)start;
p = start;
done = 0;
k = 0;
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p++) {
* if (k != offset) {
* *p = p2;
* }
* if (++k > MOD_SZ-1) {
* k = 0;
* }
* }
*/
asm __volatile__ (
"jmp L50\n\t" \
".p2align 4,,7\n\t" \
"L54:\n\t" \
"addl $4,%%edi\n\t" \
"L50:\n\t" \
"cmpl %%ebx,%%ecx\n\t" \
"je L52\n\t" \
"movl %%eax,(%%edi)\n\t" \
"L52:\n\t" \
"incl %%ebx\n\t" \
"cmpl $19,%%ebx\n\t" \
"jle L53\n\t" \
"xorl %%ebx,%%ebx\n\t" \
"L53:\n\t" \
"cmpl %%edx,%%edi\n\t" \
"jb L54\n\t" \
: "=b" (k)
: "D" (p), "d" (pe), "a" (p2),
"b" (k), "c" (offset)
);
p = pe + 1;
} while (!done);
}
}
/* Now check every nth location */
for (j=0; j<segs; j++) {
calculate_chunk(&start, &end, me, j, 4);
pe = (ulong *)start;
p = start+offset;
done = 0;
end -= MOD_SZ; /* adjust the ending address */
do {
do_tick(me);
BAILR
/* Check for overflow */
if (pe + SPINSZ > pe && pe != 0) {
pe += SPINSZ;
} else {
pe = end;
}
if (pe >= end) {
pe = end;
done++;
}
if (p == pe ) {
break;
}
/* Original C code replaced with hand tuned assembly code
* for (; p <= pe; p += MOD_SZ) {
* if ((bad=*p) != p1) {
* error((ulong*)p, p1, bad);
* }
* }
*/
asm __volatile__ (
"jmp L70\n\t" \
".p2align 4,,7\n\t" \
"L70:\n\t" \
"movl (%%edi),%%ecx\n\t" \
"cmpl %%eax,%%ecx\n\t" \
"jne L71\n\t" \
"L72:\n\t" \
"addl $80,%%edi\n\t" \
"cmpl %%edx,%%edi\n\t" \
"jb L70\n\t" \
"jmp L73\n\t" \
"L71:\n\t" \
"pushl %%edx\n\t"
"pushl %%ecx\n\t"
"pushl %%eax\n\t"
"pushl %%edi\n\t"
"call error\n\t"
"popl %%edi\n\t"
"popl %%eax\n\t"
"popl %%ecx\n\t"
"popl %%edx\n\t"
"jmp L72\n"
"L73:\n\t" \
: "=D" (p)
: "D" (p), "d" (pe), "a" (p1)
: "ecx"
);
} while (!done);
}
}
for(j = 0; j < segs; j++)
{
p += offset;
pe -= MOD_SZ; // MOD_SZ = 20
for(; p<pe; p+=MOD_SZ)
{
*p = p1; // p1 为初始化背景数
}
}
for(i = 0; i < iter; i++)
{
for(j = 0; j<segs; j++)
{
k = 0;
for(;p<pe;p++)
{
if(k != offset)
{
*p = p2; // p1 之相反数
}
if(++k > MOD_SZ-1)
{
k = 0;
}
}
}
}
for(j = 0; j<segs; j++)
{
p += offset;
pe -= MOD_SZ;
for(; p<pe; p+=MOD_SZ)
{
if(*p != p1)
{
error_report();
}
}
}