#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
const int MAXLOG = 18;
int N, K, Q, T;
vector<int> par;
vector<vector<int>> adj;
vector<vector<int>> jmp, dist;
vector<int> sub, dep;
vector<pair<int,int>> que;
int root;
void dfs(int v) {
if(sub.size() < K) {
sub.push_back(v);
if (v != root) {
que.push_back({par[v], v});
dep[v] = dep[par[v]] + 1;
}
}
for(int &u: adj[v]) {
dfs(u);
}
}
signed main() {
cin.tie(0); ios::sync_with_stdio(false);
cin >> N >> K >> Q >> T;
par.resize(N), adj.resize(N), dep.resize(N), jmp.resize(MAXLOG, vector<int>(N)), dist.resize(MAXLOG, vector<int>(N));
for(int i = 0; i < N; i++) {
cin >> par[i];
par[i]--;
if (par[i] == -2) root = i;
else adj[par[i]].push_back(i);
}
for(int i = 0; i < N; i++) {int x; cin >> x;}
dfs(root);
for(int &i: sub) cout << i+1 << ' ';
cout << '\n';
for(auto &[a, b]: que) cout << "? " << a+1 << ' ' << b+1 << '\n';
cout << '!' << endl;
jmp[0][root] = root;
for(int i = 0; i < que.size(); i++) {
int a, b, c, d;
cin >> a >> b >> c >> d;
jmp[0][que[i].second] = que[i].first;
dist[0][que[i].second] = b;
}
for(int j = 1; j < MAXLOG; j++) {
for(int &i: sub) {
dist[j][i] = dist[j-1][jmp[j-1][i]] + dist[j-1][i];
jmp[j][i] = jmp[j-1][jmp[j-1][i]];
}
}
auto lca = [&](int a, int b) {
if (dep[a] > dep[b]) swap(a, b);
int re = 0;
int j = MAXLOG - 1;
while(j >= 0) {
if (dep[b] - (1 << j) >= dep[a]) {
re += dist[j][b];
b = jmp[j][b];
}
j--;
}
if (a == b) return re;
j = MAXLOG - 1;
while(j >= 0) {
if (jmp[j][a] != jmp[j][b]) {
re += dist[j][a] + dist[j][b];
a = jmp[j][a], b = jmp[j][b];
}
j--;
}
return re + dist[0][a] + dist[0][b];
};
for(int i = 0; i < T; i++) {
int a, b; cin >> a >> b; a--, b--;
int x= lca(a, b);
cout << x << ' ' << x << '\n';
}
cout << endl;
}