Chapter 7
Link to the solution:
7.5 Consider the restructured version of loops 4701* below. Can you create a better rewrite?
7.1 Enumerate areas where the compiler has to introduce overhead in order to vectorize a loop.
7.2 Under what conditions might a vectorized loop run at the same speed or slower than the scalar version?
7.3 Consider the restructured version of loops 41091 below. Can you create a better rewrite?
98.+1 2----------< DO 41091 K = KA, KE, -1
99.+1 2 3--------< DO 41091 J = JA, JE
100. 1 2 3 Vr2---< DO 41091 I = IA, IE
101. 1 2 3 Vr2 AA(I,K,L,J) = AA(I,K,L,J)-BB(I,J,1,K)*AA(I,K+1,L,1)
102. 1 2 3 Vr2 * - BB(I,J,2,K)*AA(I,K+1,L,2)-BB(I,J,3,K)*AA(I,K+1,L,3)
103. 1 2 3 Vr2 * - BB(I,J,4,K)*AA(I,K+1,L,4)-BB(I,J,5,K)*AA(I,K+1,L,5)
104. 1 2 3 Vr2-->>> 41091 CONTINUE
98.+1 2----------< DO 41091 K = KA, KE, -1
99.+1 2 3--------< DO 41091 J = JA, JE
100. 1 2 3 Vr2---< DO 41091 I = IA, IE
101. 1 2 3 Vr2 AA(I,K,L,J) = AA(I,K,L,J)-BB(I,J,1,K)*AA(I,K+1,L,1)
102. 1 2 3 Vr2 * - BB(I,J,2,K)*AA(I,K+1,L,2)-BB(I,J,3,K)*AA(I,K+1,L,3)
103. 1 2 3 Vr2 * - BB(I,J,4,K)*AA(I,K+1,L,4)-BB(I,J,5,K)*AA(I,K+1,L,5)
104. 1 2 3 Vr2-->>> 41091 CONTINUE
7.4 Consider the restructured version of loops 4502* below. Can you create a better rewrite?
74. 1 Vr2----------< DO 45021 I = 1,N
75. 1 Vr2 F(I) = A(I) + .5
76. 1 Vr2----------> 45021 CONTINUE
77. 1
78. + 1 f-----------< DO 45022 J = 1, 10
79. 1 f Vr2--------< DO 45022 I = 1, N
80. 1 f Vr2 D(I,J) = B(J) * F(I)
81. 1 f Vr2------->> 45022 CONTINUE
82. 1
83. 1 iV-----------< DO 45023 K = 1, 5
84. + 1 iV fi------< DO 45023 J = 1, 10
85. + 1 iV fi ir4--< DO 45023 I = 1, N
86. 1 iV fi ir4 C(K,I,J) = D(I,J) * E(K)
87. 1 iV fi ir4-->>> 45023 CONTINUE
74. 1 Vr2----------< DO 45021 I = 1,N
75. 1 Vr2 F(I) = A(I) + .5
76. 1 Vr2----------> 45021 CONTINUE
77. 1
78. + 1 f-----------< DO 45022 J = 1, 10
79. 1 f Vr2--------< DO 45022 I = 1, N
80. 1 f Vr2 D(I,J) = B(J) * F(I)
81. 1 f Vr2------->> 45022 CONTINUE
82. 1
83. 1 iV-----------< DO 45023 K = 1, 5
84. + 1 iV fi------< DO 45023 J = 1, 10
85. + 1 iV fi ir4--< DO 45023 I = 1, N
86. 1 iV fi ir4 C(K,I,J) = D(I,J) * E(K)
87. 1 iV fi ir4-->>> 45023 CONTINUE
7.5 Consider the restructured version of loops 4701* below. Can you create a better rewrite?
97. 1 C THE RESTRUCTURED
98. + 1 2------------< DO 47016 K = 2, N - 1
99. + 1 2 f----------< DO 47013 J = 2, 3
100. 1 2 f fVr2----< DO 47013 I = 2, N
101. 1 2 f fVr2 A(I,J) = (1. - PX - PY - PZ) * B(I,J,K)
102. 1 2 f fVr2 1 + .5 * PX * ( B(I+1,J,K) + B(I-1,J,K) )
103. 1 2 f fVr2 2 + .5 * PY * ( B(I,J+1,K) + B(I,J-1,K) )
104. 1 2 f fVr2 3 + .5 * PZ * ( B(I,J,K+1) + B(I,J,K-1) )
105. 1 2 f fVr2-->> 47013 CONTINUE
106. 1 2
107. 1 2 IF (K .EQ. 2) THEN
108. 1 2
109. + 1 2 f-------< DO 47014 J =2, 3
110. 1 2 f f-----< DO 47014 I =2, N
111. 1 2 f f C(I,J) = A(I,J)
112. 1 2 f f---->> 47014 CONTINUE
113. 1 2
114. 1 2 ELSE
115. 1 2
116. + 1 2 f--------< DO 47015 J = 2, 3
117. 1 2 f f------< DO 47015 I = 2, N
118. 1 2 f f B(I,J,K-1) = C(I,J)
119. 1 2 f f C(I,J) = A(I,J)
120. 1 2 f f---->> 47015 CONTINUE
121. 1 2
122. 1 2 ENDIF
123. 1 2
124. 1 2--------> 47016 CONTINUE
125. 1 K = N
126. + 1 ir2---------< DO 47017 I = 2, N
127. + 1 ir2 iw-----< DO 47017 J = 2, 3
128. 1 ir2 iw A(I,J) = (1. - PX - PY - PZ) * B(I,J,K)
129. 1 ir2 iw 1 + .5 * PX * ( B(I+1,J,K) + B(I-1,J,K) )
130. 1 ir2 iw 2 + .5 * PY * ( B(I,J+1,K) + B(I,J-1,K) )
131. 1 ir2 iw 3 + .5 * PZ * ( B(I,J,K+1) + B(I,J,K-1) )
132. 1 ir2 iw B(I,J,K) = A(I,J)
133. 1 ir2 iw B(I,J,K-1) = C(I,J)
134. 1 ir2 iw C(I,J) = A(I,J)
135. 1 ir2 iw--->> 47017 CONTINUE
97. 1 C THE RESTRUCTURED
98. + 1 2------------< DO 47016 K = 2, N - 1
99. + 1 2 f----------< DO 47013 J = 2, 3
100. 1 2 f fVr2----< DO 47013 I = 2, N
101. 1 2 f fVr2 A(I,J) = (1. - PX - PY - PZ) * B(I,J,K)
102. 1 2 f fVr2 1 + .5 * PX * ( B(I+1,J,K) + B(I-1,J,K) )
103. 1 2 f fVr2 2 + .5 * PY * ( B(I,J+1,K) + B(I,J-1,K) )
104. 1 2 f fVr2 3 + .5 * PZ * ( B(I,J,K+1) + B(I,J,K-1) )
105. 1 2 f fVr2-->> 47013 CONTINUE
106. 1 2
107. 1 2 IF (K .EQ. 2) THEN
108. 1 2
109. + 1 2 f-------< DO 47014 J =2, 3
110. 1 2 f f-----< DO 47014 I =2, N
111. 1 2 f f C(I,J) = A(I,J)
112. 1 2 f f---->> 47014 CONTINUE
113. 1 2
114. 1 2 ELSE
115. 1 2
116. + 1 2 f--------< DO 47015 J = 2, 3
117. 1 2 f f------< DO 47015 I = 2, N
118. 1 2 f f B(I,J,K-1) = C(I,J)
119. 1 2 f f C(I,J) = A(I,J)
120. 1 2 f f---->> 47015 CONTINUE
121. 1 2
122. 1 2 ENDIF
123. 1 2
124. 1 2--------> 47016 CONTINUE
125. 1 K = N
126. + 1 ir2---------< DO 47017 I = 2, N
127. + 1 ir2 iw-----< DO 47017 J = 2, 3
128. 1 ir2 iw A(I,J) = (1. - PX - PY - PZ) * B(I,J,K)
129. 1 ir2 iw 1 + .5 * PX * ( B(I+1,J,K) + B(I-1,J,K) )
130. 1 ir2 iw 2 + .5 * PY * ( B(I,J+1,K) + B(I,J-1,K) )
131. 1 ir2 iw 3 + .5 * PZ * ( B(I,J,K+1) + B(I,J,K-1) )
132. 1 ir2 iw B(I,J,K) = A(I,J)
133. 1 ir2 iw B(I,J,K-1) = C(I,J)
134. 1 ir2 iw C(I,J) = A(I,J)
135. 1 ir2 iw--->> 47017 CONTINUE