This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
using namespace std;
using ll = long long; 
#define pb push_back
#define all(x) begin(x), end(x)
#define sz(x) (int) (x).size()
#define f first
#define s second
#define pii pair<int, int>
template <typename T> class SparseTable {
  private:
    int n, log2dist; 
    vector<vector<T>> st;
  public:
    // build o(nlogn) query o(1)
    SparseTable(const vector<T> &v) {
        n = (int)v.size();
        log2dist = 1 + (int)log2(n);
        st.resize(log2dist);
        st[0] = v;
        for (int i = 1; i < log2dist; i++) {
            st[i].resize(n - (1 << i) + 1);
            for (int j = 0; j + (1 << i) <= n; j++) {
                st[i][j] = min(st[i - 1][j], st[i - 1][j + (1 << (i - 1))]);
            }
        }
    }
    // minimum on 0-indexed range [l, r]
    T query(int l, int r) {
        int i = (int)log2(r - l + 1);
        return min(st[i][l], st[i][r - (1 << i) + 1]);
    }
};
class LCA {
  private:
    const int n;
    const vector<vector<int>> &adj;
    SparseTable<pair<int, int>> rmq;
    vector<int> tin, et, dep;
    int timer = 0;
    // prepares tin, et, dep vectors
    void dfs(int u, int p) {
        tin[u] = timer;
        et[timer++] = u;
        for (int v : adj[u]) {
            if (v == p) { continue; }
            dep[v] = dep[u] + 1;
            dfs(v, u);
            et[timer++] = u;
        }
    }
  public:
    // build o(nlogn) query o(1)
    // make sure the adjacency list is 0 indexed
    LCA(vector<vector<int>> &_adj)
        : n((int)_adj.size()), adj(_adj), tin(n), et(2 * n), dep(n),
          rmq(vector<pair<int, int>>(1)) {
        dfs(0, -1);
        vector<pair<int, int>> arr(2 * n);
        for (int i = 0; i < 2 * n; i++) { arr[i] = {dep[et[i]], et[i]}; }
        rmq = SparseTable<pair<int, int>>(arr);
    }
    // LCA of 0-indexed nodes a and b
    int query(int a, int b) {
        if (tin[a] > tin[b]) { swap(a, b); }
        return rmq.query(tin[a], tin[b]).second;
    }
};
template <class T> class BIT {
  private:
    int n;
    vector<T> bit;
    vector<T> arr;
  public:
    BIT(int n) : n(n), bit(n + 1), arr(n) {}
    void set(int k, T val) { add(k, val - arr[k]); }
    void add(int k, T val) {
        arr[k] += val;
        k++;
        for (; k <= n; k += k & -k) { bit[k] += val; }
    }
    T query(int a, int b) {
        b++;
        T s = 0;
        for (; a > 0; a -= a & -a) { s -= bit[a]; }
        for (; b > 0; b -= b & -b) { s += bit[b]; }
        return s;
    }
};
const int N = 1e5 + 5; 
int n, m, k, timer = -1; 
BIT<int> bit(N); 
int d[N]; 
int id[N]; 
int out[N]; 
int rev[N]; 
vector<pii> e; 
vector<int> t[N], ans; 
vector<vector<int>> adj; 
void dfs(int node, int prev){
    id[node] = ++timer;
    for(auto x : adj[node]){
        if(x != prev){
            d[x] = d[node] + 1; 
            dfs(x, node); 
        }
    }
    out[node] = timer; 
}
int32_t main() {
    cin.tie(0)->sync_with_stdio(0);
    cin >> n >> m >> k; 
    adj.resize(n); 
    for(int i = 0; i < n - 1; i++){
        int a, b; 
        cin >> a >> b; a--; b--; 
        adj[a].push_back(b); 
        adj[b].push_back(a); 
        e.push_back({a, b}); 
    }
    dfs(0, -1); 
    LCA lca(adj); 
    for(int i = 0; i < m; i++){
        int x; 
        cin >> x; 
        for(int j = 0; j < x; j++){
            int a; 
            cin >> a; a--; 
            t[i].push_back(a); 
        }
        sort(all(t[i]), [](int &a, int &b){
            return id[a] < id[b]; 
        }); 
        t[i].push_back(t[i][0]); 
        for(int j = 0; j < x; j++){
            int anc = lca.query(t[i][j], t[i][j + 1]); 
            bit.add(id[t[i][j]], 1); 
            bit.add(id[t[i][j + 1]], 1); 
            bit.add(id[anc], -2); 
        }
    }
    for(int i = 0; i < n - 1; i++){
        if(d[e[i].f] > d[e[i].s]){
            swap(e[i].f, e[i].s); 
        }
        rev[e[i].s] = i + 1; 
    }
    for(int i = 1; i < n; i++){
        if(bit.query(id[i], out[i]) >= 2 * k){
            ans.push_back(rev[i]); 
        }
    }
    cout << sz(ans) << '\n'; 
    for(auto x : ans){
        cout << x << ' '; 
    }
    return 0; 
}
Compilation message (stderr)
railway.cpp: In constructor 'LCA::LCA(std::vector<std::vector<int> >&)':
railway.cpp:41:26: warning: 'LCA::dep' will be initialized after [-Wreorder]
   41 |     vector<int> tin, et, dep;
      |                          ^~~
railway.cpp:40:33: warning:   'SparseTable<std::pair<int, int> > LCA::rmq' [-Wreorder]
   40 |     SparseTable<pair<int, int>> rmq;
      |                                 ^~~
railway.cpp:57:5: warning:   when initialized here [-Wreorder]
   57 |     LCA(vector<vector<int>> &_adj)
      |     ^~~| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... |