\documentclass[letterpaper,12pt]{article}

% to do:
%   \fdvector and \fdmatrix
%   postscript fonts?
%   where's distiller?
% ``times'' is no longer an option for \documentclass?
% use fleqn as an option?  \setlength{\mathindent{0.25in}} in preamble
% use titlepage option?
% extra \nocite{blah}s?
% split bibliography .bib's into topic files?
% change \Gamma_D to a macro that shifts the ``D'' closer to the \Gamma
%  do likewise for VHh and others?


% begin preamble

%%% Packages Used:
% \usepackage{doublespace}
% could also use amssymb which contains amsfonts plus extra symbols
\usepackage{amsmath,amsfonts}
\usepackage{bm}
\usepackage[dvips]{graphicx,color}

%%% Places to look for EPS files
\graphicspath{{diagrams/}}

%%% Bibliography style
%\bibliographystyle{siam}
\bibliographystyle{abbrv}

%%% Colors
\definecolor{greyblue}{rgb}{0.35,0.4,0.62}       %89.25 102 158.10
\definecolor{GrayBlue}{rgb}{0.35,0.4,0.62}       %89.25 102 158.10
\definecolor{orangered}{RGB}{255,69,0}           % #FF4500
\definecolor{darkorange}{cmyk}{.20,.50,.80,0}    % rgb .80 .50 .20
\definecolor{lightorange}{cmyk}{.07,.37,.65,0}   % rgb .93 .63 .35
\definecolor{webyellow}{RGB}{204, 153, 000}      % #cc9900
\definecolor{darkgreen}{RGB}{51,102,0}           % #003300
\definecolor{darkpeagreen}{cmyk}{.50,.30,.50,0}  % .50 .70 .50
\definecolor{webpeagreen}{rgb}{0.80,0.80,0.60}   % RGB 204 204 153
\definecolor{firebrick}{RGB}{178,34,34}          % #B22222
\definecolor{ltblue}{RGB}{176,196,222}           % #B0D4CE
\definecolor{seagreen}{RGB}{46,139,87}           % #2E8B57
\definecolor{seafoamgreen}{rgb}{0.5372,0.7059,0.5020}  %137,180,128 /255
\definecolor{SeafoamGreen}{rgb}{0.5372,0.7059,0.5020}  %137,180,128 /255
\definecolor{lightpeagreen}{cmyk}{.22,.20,.40,0} % rgb: 0.78 0.80 0.60
\definecolor{Orange}{cmyk}{.20,.50,.80,0}        % rgb .80 .50 .20
   \definecolor{mygreen}{rgb}{0.20,0.60,0.40}
   \definecolor{paledullorange}{rgb}{1.00,0.80,0.60}
   \definecolor{obscureweakblue}{rgb}{0.00,0.00,0.20}
   \definecolor{cdsbrown}{RGB}{65,12,9}
   \definecolor{cdspeach}{RGB}{245,210,168}
   \definecolor{cdsorange}{RGB}{239,140,47}
   \definecolor{cdsteal}{RGB}{10,80,88}
   \definecolor{cdsdarkblue}{RGB}{19,8,48}
   \definecolor{cdspaleorange}{cmyk}{0.0,0.15,0.32,0.20}
   \definecolor{bo}{rgb}{0.87451,0.439216,0.137255}

%%% Extra Formatting Macros
% we might need to define \undertilde{arg} to be \stackrel{arg}{~}
%   or \underset{arg}{~} (is ~ special?).  For ``div'' and ``grad'' we want
%   \DeclareMathOperator{\div}{div} and the same for grad (but we want to
%   indicate that it's a vector...)
\DeclareMathOperator{\diver}{div}
\DeclareMathOperator{\grad}{\mathbf{grad}}
%\newcommand{\undertilde}[1]{\utilde{#1}}
%\newcommand{\undertilde}[1]{\underset{\~ }{#1}}
\newcommand{\undertilde}[1]{\underset{\sim}{#1}}
%\newcommand{\undertilde}[1]{\underset{\textasciitilde}{#1}}
% finite-dimensional matrices and vectors
% I would use \matrix and \vector, but those are already used
\newcommand{\fdmatrix}[1]{\ensuremath{\mathrm{\mathbf{#1}}}}
\newcommand{\fdvector}[1]{\ensuremath{\mathrm{\mathbf{#1}}}}

%%% Notation Shortcuts
\newcommand{\dlp}{\ensuremath{\delta p}}
\newcommand{\dlpbar}{\ensuremath{\delta \bar{p}}}
\newcommand{\dlptwid}{\ensuremath{\delta \tilde{p}}}
\newcommand{\dlptwidi}{\ensuremath{\delta \tilde{p}_i}}
\newcommand{\dlphat}{\ensuremath{\delta \hat{p}}}
\newcommand{\dlphatj}{\ensuremath{\delta \hat{p}_j}}
\newcommand{\dlw}{\ensuremath{\delta w}}
\newcommand{\dlW}{\ensuremath{\delta W}}
\newcommand{\DlW}{\ensuremath{\Delta W}}
\newcommand{\dlu}{\ensuremath{\delta \mathbf{u}}}
\newcommand{\dlubar}{\ensuremath{\delta \mathbf{\bar{u}}}}
\newcommand{\dlutwid}{\ensuremath{\delta \mathbf{\tilde{u}}}}
\newcommand{\dlutwidi}{\ensuremath{\delta \mathbf{\tilde{u}}_i}}
\newcommand{\dluhat}{\ensuremath{\delta \mathbf{\hat{u}}}}
\newcommand{\dluhatj}{\ensuremath{\delta \mathbf{\hat{u}}_j}}
\newcommand{\dlv}{\ensuremath{\delta \mathbf{v}}}
\newcommand{\dlV}{\ensuremath{\delta \mathbf{V}}}
\newcommand{\DlV}{\ensuremath{\Delta \mathbf{V}}}
\newcommand{\pc}{\ensuremath{p_c}}
\newcommand{\wc}{\ensuremath{w_c}}
\newcommand{\Wc}{\ensuremath{W_c}}
\newcommand{\Wsc}{\ensuremath{W_{sc}}}
\newcommand{\uc}{\ensuremath{\mathbf{u}_c}}
\newcommand{\vc}{\ensuremath{\mathbf{v}_c}}
\newcommand{\Vc}{\ensuremath{\mathbf{V}_c}}
\newcommand{\Vsc}{\ensuremath{\mathbf{V}_{sc}}}
\newcommand{\vgn}{\ensuremath{\mathbf{v}_{g_N}}}
\newcommand{\divdot}{\ensuremath{\mathbf{\nabla} \cdot}}
\newcommand{\Wh}{\ensuremath{W_h}}
\newcommand{\Vh}{\ensuremath{\mathbf{V}_h}}
\newcommand{\WHh}{\ensuremath{W_{H,h}}}
\newcommand{\VHh}{\ensuremath{\mathbf{V}_{H,h}}}
\newcommand{\VtwidHh}{\ensuremath{\mathbf{\tilde{V}}_{H,h}}}
\newcommand{\WH}{\ensuremath{W_H}}
\newcommand{\VH}{\ensuremath{\mathbf{V}_H}}
\newcommand{\VtwidH}{\ensuremath{\mathbf{\tilde{V}}_H}}
\newcommand{\dlWh}{\ensuremath{\delta W_h}}
\newcommand{\dlVh}{\ensuremath{\delta \mathbf{V}_h}}
\newcommand{\pHh}{\ensuremath{p_{H,h}}}
\newcommand{\uHh}{\ensuremath{\mathbf{u}_{H,h}}}
\newcommand{\utwidHh}{\ensuremath{\mathbf{\tilde{u}}_{H,h}}}
\newcommand{\ph}{\ensuremath{p_h}}
\newcommand{\uh}{\ensuremath{\mathbf{u}_h}}
\newcommand{\vh}{\ensuremath{\mathbf{v}_h}}
\newcommand{\bvec}[1]{\ensuremath{\bm{#1}}}
\newcommand{\abs}[1]{\ensuremath{\lvert #1 \rvert}}
\newcommand{\norm}[1]{\ensuremath{\lVert #1 \rVert}}


%\setlength{\parindent}{0pt}
%\setlength{\parskip}{1ex plus 0.5ex minus 0.2ex}
%\pagestyle{plain}

% end preamble

\begin{document}

\pagenumbering{roman}
%-------------------------------------------
\begin{titlepage}
%-------------------------------------------
%\thispagestyle{empty}

\begin{center}
A CAM Dissertation Proposal on \\
\vspace{0.125in}
{\Large Subgrid Upscaling Preconditioners for Darcy Flow} \\
\vspace{0.5in}
James M.\ Rath \\
\vspace{0.5in}
\begin{tabular}{l}
Committee: \\
Todd J.\ Arbogast (advisor), Mathematics \\
Steven L.\ Bryant, Petroleum and Geosystems Engineering \\
Clinton N.\ Dawson, Aerospace Engineering and Engineering Mechanics \\
Robert A.\ van~de~Geijn, Computer Sciences \\
Jack Xin, Mathematics \\
%``Your name here!'' \\
\end{tabular} \\
\vspace{2.5in}
Institute for Computational Engineering and Sciences \\
The University of Texas at Austin \\
August 2003
\end{center}

\newpage

\begin{abstract}
The proposed research for the Computational and Applied Mathematics Ph.~D. degree
is to use innovative improvements to and applications of a variational
multiscale subgrid upscaling procedure to greatly reduce the time needed to
simulate Darcy flow problems.  Additionally, these techniques are sufficiently
abstract to be applied to other model physical problems with likely success.

Simulation of flow through a porous medium on a large scale can be
computationally expensive if the flow is resolved at a fine scale.  Numerous
techniques have been proposed to ``upscale'' the problem so that computations
can be performed on a coarser scale but still retain information about the
fine-scale flow and problem data.  We use one kind of upscaling --- variational
multiscale subgrid upscaling --- to construct preconditioners for solving the
full fine-scale problem.

In one preconditioning method, multigrid techniques are used to form a two-level
iterative scheme with a fine-scale smoother as one level, and the upscaling
preconditioner as the second level.  Like multigrid, this scheme has a
convergence factor dependent on both the permeability field (the ratio of
maximum to minimum values) and the relative resolution of the fine and coarse
grids.  However, the upscaling preconditioner is much more effective at
capturing the correct coarse-scale behavior than a coarse-scale smoother (as
used in multigrid).  Further, the upscaling preconditioner has an operation
count similar to that of multigrid, and the extra subgrid computations exhibit
perfect parallelism.

In another method, the coarse basis for the variational multiscale subgrid
upscaling is modified to allow the possibility of the upscaled solution
coinciding with the full fine-scale solution.  Starting with the ordinary
upscaling solution, iterative nonlinear optimization techniques are applied to
update the basis for the upscaling problem.  Under suitable conditions, the
modified upscaled solution will converge superlinearly to the fine-scale
solution.  The method only requires solving upscaled problems (an
``inexpensive'' operation) and some matrix-vector multiplies to find the
residual of the fine-scale problem.

The proposed research is multidisciplinary and requires developments and
understanding in mathematics, computer science, and engineering.  Although there
is a degree of overlap between the problems studied and methods used in these
disciplines, some goals specific to each area are listed below.

\textbf{Area A:} A-priori error analysis of finite element methods as applied to
linear elliptic partial differential equations will be a necessary element to
show convergence of the proposed methods.  For the two-level method we need to
develop error estimates for the smoothing and approximation properties.  For the
nonlinear method we need to find the relationship between the error in the extra
degrees of freedom and the error in the related modified upscaled problem.  An
appropriate norm for the error in the extra degrees of freedom is needed
(perhaps they can be viewed as traces of fluxes on coarse cell boundaries).

\textbf{Area B:} Research of analysis of convergence of iterative schemes for
solving large, sparse linear systems is necessary.  Both methods use Schur
complement techniques (on both subgrid problems and velocities), and
(preconditioned) conjugate gradient and Krylov ideas.  The two-level scheme
requires analysis of smoothers and the performance of a multigrid-like scheme.
The nonlinear method requires study of the details of Newton, quasi-Newton, and
secant methods.  For each of these three, the problem of global convergence
needs to be addressed.  For the quasi-Newton methods, there is also the choice
of approximation to the Jacobian and how it is factored.  For the secant method
there are the issues of QR updates, the choice of initialization of the
approximate Jacobian, and the bounded deterioration of it.  Consideration needs
to be made of high performance computing and parallelism concerns when designing
and implementing algorithms.

\textbf{Area C:} Efficient, accurate Darcy flow simulations are the prime
motivation for this research.  We need to detail the performance of the proposed
algorithms in different geostatistical settings to guide practitioners' use.  We
need to be able to say how quickly and how well we can approximate large-scale
features of the flow.  We want to know if long-range correlations in the problem
data present any theoretical or practical difficulties for the algorithms, and
be able to present some engineering conclusions about the effect of such
correlations on flow.  We need to provide a well-documented implementation of
the proposed algorithms in parallel for three dimensional problems (and perhaps
extend it to handle two-phase miscible and/or immiscible displacements).  It
should be able to model both rate wells and Peaceman bottom-hole-pressure wells.
\end{abstract}

\newpage
%----------------------------------------------
\setcounter{tocdepth}{1}
\tableofcontents
%----------------------------------------------
\newpage
%----------------------------------------------
\listoffigures
%\listoftables
%----------------------------------------------

\end{titlepage}
\pagenumbering{arabic}


%----------------------------------------------
\section{Introduction}
\label{sec:intro}
%----------------------------------------------

Darcy's law describes fluid flow through a porous medium.  It is an empirical
law that asserts bulk flow of a fluid through the medium is proportional to the
gradient of the pressure across the medium (accounting for hydrostatic
differences from gravity)~\cite{Darcy_1856,Bear_1972,Scheidegger_1974,Freeze_Cherry_1979}:
$$\fdvector{u}=-\frac{\fdmatrix{\kappa}}{\mu}\left(\fdvector{\nabla}p-\rho\fdvector{g}\right)$$
Darcy's law has found wide applicability in modeling subsurface flows, and has
been generalized to model multicomponent and multiphase flows.  (The above
differential form is itself a generalization of the relation Darcy formulated.)
Our primary interest is in using Darcy's law to model oil reservoir and
groundwater contaminant flow.

Darcy's law alone is insufficient to describe the physics: conservation of mass
(the continuity equation) and equations of state (relating density, viscosity,
and permeability to phase fraction and temperature) are necessary.  In the
applications being considered, there is often a need for the velocity to be very
accurate and to strictly (locally) observe mass conservation; hence our focus on
mixed methods~\cite{Russell_Wheeler_1983,DEW_1983,DEW_1984,ERW_1984}.
Although our presentation ignores aspects of multiphase flow, the proposed ideas
and software can readily be adapted to model such flows.  We do not account for
effects of temperature changes in the model.

\subsection{Heterogeneity and why it is a problem}

Geostatistical modeling is used to generate the necessary data (porosity and
permeability) to specify the problem to be
approximated~\cite{Deutsch_Journel_1997}.  This data is typically given at a
very fine resolution~\cite{Durlofsky_2002}, but the goal is to predict
long-range flow behavior; it is tempting, then, to approximate the problem at a
very coarse scale.  However, fine-scale features of the problem data can have
very large effects on the coarse-scale flow behavior~\cite{Durlofsky_2002}.
Therein lies one big difficulty: it is necessary to resolve the flow at very
fine scales resulting in computationally poor conditioned problems to
solve~\cite{GLMT_1985}.  Moreover, the resolution cannot be reduced to shrink
the size of the system: (1) heterogeneity in the permeability (irregular, short
spatial-scale jumps) means $p$-refinements (high-order approximations) will not
help, and (2) spatially-limited resolution and spatially-uniform heterogeneity
means $h$-refinements (coarse scaling) will not help either.

The fine-scale resolution necessary in simulations makes for poor conditioning,
yet there is still another difficulty: the jumps in the permeability can
sometimes be quite severe (several orders of magnitude changes) between nearby
locations (see for example Figure~\ref{fig:brent_logperm}).  This makes our
computation of an approximation even more poorly conditioned.  The more
heterogeneous and fine-scale the problem, the more computationally expensive it
becomes as all known direct/iterative linear solvers have behavior which worsens
with greater condition number~\cite{Golub_VanLoan_1996}.  We seek to broaden the
range of problems that are computationally feasible.

\subsection{Proposed solution techniques}

One approach used to overcome the poor-conditioning in modeling multiscale
phenomena is upscaling~\cite{Durlofsky_2002}.  In upscaling techniques, an
averaging process is used to determine the influence of fine-scales on the
simulation and to adjust the coarse-scale computations accordingly.  Typically
an upscaled model will more accurately predict the coarse flow, and will also
correctly display fine-scale features of the flow.  With mixed variational
multiscale subgrid upscaling~\cite{Arbogast_2002_submitted,HFMQ_1998}, the
scales are split into subgrid and coarse parts before approximating, thereby
keeping all the fine-scale information in the model.  The mixed framework keeps
strict conservation of mass.

Since one goal of upscaling is to obtain a better coarse-scale approximation of
the flow, it is a natural idea to try substituting the upscaled approximation
for an ordinary coarse-scale approximation in a multigrid scheme.  The upscaling
substitution is also natural in that it is about as computationally expensive as
the ordinary coarsening.  This substitution forms the basis for one of the
proposed algorithms.

In upscaling, the flexibility of (number of degrees of freedom in) our
approximation is reduced in order to obtain a smaller or better algebraic
problem to solve.  The second proposed algorithm attempts to reintroduce the
necessary flexibility into the upscaled model to be able to capture the
fine-scale solution.  A sequence of upscaled problems is solved in which the
problem structure (not the problem data) is gradually evolved towards a problem
which has a solution which is functionally equivalent to the fine-scale
solution.  This evolution uses nonlinear optimization techniques which have the
potential of superlinear convergence.  Each step in the optimization only
requires solving an ``upscaled'' problem and determining its fine-scale
residual.

\subsection{Summary}

The emphasis in the proposed research will be to provide new tools for improving
the speed and accuracy of numerical simulation of systems exhibiting significant
heterogeneity on a fine spatial scale, rather than developing new theories of
transport.  This research will emphasize the use of mixed variational multiscale
subgrid upscaling to reduce the time necessary to accurately model these flows.
Each of the proposed methods attempts to solve a smaller system (with much
better conditioning and a special algebraic structure) to obtain an
approximation to the full system.

A description of mixed variational multiscale subgrid upscaling is included in
Section~\ref{sec:subgrid_upscaling} in order to introduce some terminology and
notation, and to remind the reader of how this method works.  The next two
sections describe the new preconditioners: a two-level iterative method that
uses smoothing and subgrid upscaling to solve fine-scale, heterogeneous problems
in Section~\ref{sec:twolevel}; and a nonlinear optimization scheme for solving
problems in Section~\ref{sec:nonlinear}.  Some of the modeling applications of
this work are set out in Section~\ref{sec:applications}, and the software that
has been and will be implemented to carry out simulations using the ideas in
this proposal is detailed in Section~\ref{sec:software}.

%----------------------------------------------
\section{Description of Mixed Variational Multiscale Subgrid Upscaling}
\label{sec:subgrid_upscaling}
%----------------------------------------------

Because of the complexity of the proposed algorithms, it is useful to first
describe the underlying method and introduce some terminology and notation.

Use $\Omega$ to denote the spatial extent of the porous medium and assume
that it is a connected, convex, polygonal domain in $\mathbb{R}^n$ (where $n$ is
1, 2, or~3).  Let $p$ and $\mathbf{\bar{u}}$ denote the unknown pressure and
macroscopic velocity.  Our model problem in the interior of the domain consists
of Darcy's Law
\begin{equation}
\mathbf{\bar{u}} =
-\frac{\mathbf{\kappa}}{\mu} \left(\mathbf{\nabla} p - \rho \mathbf{g}\right),
\label{eqn:darcy}
\end{equation}
and the continuity equation
\begin{equation}
\divdot \mathbf{\bar{u}} = f,
\label{eqn:continuity}
\end{equation}
where $\kappa$, $\mu$, $\rho$, and $\mathbf{g}$ are the (spatially-varying)
permeability, viscosity, density, and gravity, and $f$ is a source term.  On one
piece of the boundary $\Gamma_N$, specify the flow
\begin{equation}
\mathbf{u} \cdot \nu = g_N,
\label{eqn:neumann_bc}
\end{equation}
and on another, $\Gamma_D$, specify the pressure
\begin{equation}
p = g_D.
\label{eqn:dirichlet_bc}
\end{equation}
The pieces of the boundary add up to the whole
$\partial\Omega=\bar{\Gamma}_N\cup\bar{\Gamma}_D$, the pieces are disjoint
$\Gamma_N\cap\Gamma_D=\emptyset$, and $\nu$ is the unit outer normal vector to
$\partial\Omega$.  A cell-centered finite difference method (CCFD) is used to
approximate flow equations for the pressure.

The proposed methods, however, apply to the
problem~\eqref{eqn:darcy}--\eqref{eqn:dirichlet_bc} in mixed variational form.  Let
$$\mathbf{V} = \left\{ \mathbf{v} \in H(\diver,\Omega) \; | \;
\mathbf{v} \cdot \nu = 0 \;\mbox{on}\; \Gamma_N \right\},$$
$W = L^2(\Omega)$, and $\vgn$ be an element of $H(\diver,\Omega)$ such that
$\vgn \cdot \nu = g_N$ on $\Gamma_N$ and $\vgn \cdot \nu = 0$ on $\Gamma_D$.
(An important special case occurs if $\Gamma_N=\partial\Omega$.  In that case,
use $W = L^2 / \mathbb{R}$ instead; a compatibility condition on $f$ and $g_N$
is also required.)  The problem is then to find $\mathbf{u} \in \mathbf{V}$ and
$p \in W$ such that
\begin{align}
(\divdot \mathbf{u},w) &= (b,w)  &  \forall w \in W,
\label{eqn:continuity_var} \\
(\mathbf{k} \mathbf{u}, \mathbf{v}) - (p,\divdot \mathbf{v}) &=
(\mathbf{c}, \mathbf{v}) - (g_D,\mathbf{v}\cdot \nu)_{\Gamma_D}  &
\forall \mathbf{v} \in \mathbf{V}.
\label{eqn:darcy_var}
\end{align}
The substitutions $\mathbf{k} = \mu \mathbf{\kappa}^{-1}$, $b=f-\divdot\vgn$,
and $\mathbf{c}=\rho\mathbf{g}-\mathbf{k}\vgn$ have been made to simplify the
notation.  Note that the velocity $\mathbf{\bar{u}}=\mathbf{u}+\vgn$ is the
solution to the original problem~\eqref{eqn:darcy}--\eqref{eqn:dirichlet_bc}.

In order for either formulation of the problem to have a solution, the data
needs to meet some regularity constraints.  In the variational formulation, one
of interest in this proposal is that $\mathbf{k}$ must be a symmetric rank-two
tensor in $\left(L^\infty(\Omega)\right)^{n\times n}$ (denote its essential
supremum by $k^*$), and must be uniformly elliptic (denote its essestial infimum
by $k_*$).  The ratio of these two values $k^*/k_*$ will have an impact on the
performance of the proposed preconditioners.

%In the (common) special case that $\Gamma_N=\partial\Omega$, compat cond

\subsection{A mixed finite element approximation}

For a fine-scale problem, assume the permeability data $\mathbf{k}$ is given as
piecewise constant on a grid with spacing $h$, and that measurement
uncertainties prevent specifying it at any finer resolution.  It then makes
sense to find solutions at the same resolution, and no finer.  That is, find
$\mathbf{u}_h \in \Vh \subset \mathbf{V}$ and $p_h \in \Wh \subset W$ such that
\begin{align}
(\divdot \mathbf{u}_h,w_h) &= (b,w_h)  &  \forall w_h \in \Wh, \\
(\mathbf{k} \mathbf{u}_h, \mathbf{v}_h) - (p_h,\divdot \mathbf{v}_h) &=
(\mathbf{c}, \mathbf{v}_h) - (g_D,\mathbf{v}_h\cdot \nu)_{\Gamma_D}  &
\forall \mathbf{v}_h \in \Vh,
\end{align}
where $\Wh \times \Vh$ are Raviart-Thomas-Nedelec elements of order zero
(RT0)~\cite{Raviart_Thomas_1977,Brezzi_Fortin_1991,Roberts_Thomas_1991} on the
given grid.  See Figure~\ref{fig:fine_basis} for an illustration of the degrees
of freedom of these elements.


\begin{figure}[htbp]
\begin{center}
{\setlength\unitlength{4pt}
\begin{picture}(72,48)(0,0)
\multiput(0,0)(0,4){13}{ \line(1,0){72} }
\multiput(0,0)(6,0){13}{ \line(0,1){48} }
\textcolor{seafoamgreen}{
%  \multiput(3,0)(6,0){12}{\multiput(0,2)(0,4){12}{\makebox(0,0){\circle*{1}}}}
%  \multiput(3,0)(6,0){12}{\multiput(0,2)(0,4){12}{\circle*{1}}}
%  \multiput(6,0)(6,0){12}{\multiput(0,2)(0,4){12}{\circle*{1}}}
  \multiput(4.25,0)(6,0){12}{\multiput(0,2)(0,4){12}{\circle*{1}}}
}
\textcolor{orangered}{
  \multiput(0,0)(6,0){13}{\multiput(0,2)(0,4){12}{\makebox(0,0){$\times$}}}
  \multiput(0,0)(0,4){13}{\multiput(3,0)(6,0){12}{\makebox(0,0){$\times$}}}
}
\end{picture}}
\end{center}
\caption[Degrees of freedom on a fine grid.]{Degrees of freedom for
  2-D RT0 elements on a fine rectangular grid: \textcolor{orangered}{$\times$}
  normal velocity degrees of freedom, and \textcolor{seafoamgreen}{$\bullet$}
  pressure degrees of freedom.}
\label{fig:fine_basis}
\end{figure}


Using quadrature --- the trapezoidal rule --- to compute the term
$(\mathbf{k} \mathbf{u}_h, \mathbf{v}_h)$ is equivalent to solving the
original problem~\eqref{eqn:continuity_var}--\eqref{eqn:darcy_var} using
cell-centered finite differences~\cite{Russell_Wheeler_1983,AWY_1997}.

If a coarse-scale approximation is desired instead, then find
$\mathbf{u}_H \in \VH \subset \mathbf{V}$ and $p_H \in \WH \subset W$ such that
\begin{align}
(\divdot \mathbf{u}_H,w_H) &= (b,w_H)  &  \forall w_H \in \WH, \\
(\mathbf{k} \mathbf{u}_H, \mathbf{v}_H) - (p_H,\divdot \mathbf{v}_H) &=
(\mathbf{c}, \mathbf{v}_H) - (g_D,\mathbf{v}_H\cdot \nu)_{\Gamma_D}  &
\forall \mathbf{v}_H \in \VH.
\end{align}
where $\WH \times \VH$ are RT0 elements or, perhaps,
Brezzi-Douglas-Dur\`{a}n-Fortin order one (BDDF1)~\cite{BDDF_1987} elements) on
a grid with spacing $H$, an integer multiple of $h$.  See
Figure~\ref{fig:coarse_basis} for an illustration.


\begin{figure}[htbp]
\begin{center}
{\setlength\unitlength{4pt}
\begin{picture}(72,48)(0,0)
\multiput(0,0)(0,16){4}{ \line(1,0){72} }
\multiput(0,0)(24,0){4}{ \line(0,1){48} }
\textcolor{seafoamgreen}{
  \multiput(13.5,0)(24,0){3}{\multiput(0,8)(0,16){3}{\makebox(0,0){\circle*{1}}}}
}
\textcolor{orangered}{
%  \multiput(0,0)(24,0){4}{\multiput(0,8)(0,16){3}{\makebox(0,0){$\otimes$}}}
%  \multiput(0,0)(0,16){4}{\multiput(12,0)(24,0){3}{\makebox(0,0){$\otimes$}}}
  \multiput(0,0)(24,0){4}{\multiput(0,4)(0,16){3}{\makebox(0,0){$\otimes$}}}
  \multiput(0,0)(24,0){4}{\multiput(0,12)(0,16){3}{\makebox(0,0){$\otimes$}}}
  \multiput(0,0)(0,16){4}{\multiput(6,0)(24,0){3}{\makebox(0,0){$\otimes$}}}
  \multiput(0,0)(0,16){4}{\multiput(18,0)(24,0){3}{\makebox(0,0){$\otimes$}}}
}
\end{picture}}
\end{center}
\caption[Degrees of freedom on a coarse grid.]{Degrees of freedom
  for 2-D BDM1~\cite{BDM_1985} elements on a coarse rectangular grid:
  \textcolor{orangered}{$\otimes$} (linear) normal velocity degrees of freedom,
  and \textcolor{seafoamgreen}{$\bullet$} pressure degrees of freedom.}
\label{fig:coarse_basis}
\end{figure}


%With the RT0 elements, this is equivalent to using a $\mathbf{\kappa}$ which is
%piecewise constant on the $H$ grid, and is the harmonic average of the
%fine-scale $\mathbf{\kappa}$.  (NB: $\mathbf{\kappa}$ \emph{not} $\mathbf{k}$.)

For the RT0 elements, some error
estimates~\cite{Brezzi_Fortin_1991,Douglas_Roberts_1985,Roberts_Thomas_1991} for
a grid size $h$ are
\begin{align}
\norm{\mathbf{u} - \uh}_0 &\le C\norm{\mathbf{u}}_1 h &&= O(h),
\label{eqn:rt0_velocity_error} \\
\norm{p - \ph}_0 &\le C\norm{p}_2 h &&= O(h).
\label{eqn:rt0_pressure_error} \\
\intertext{For BDM1/BDDF1 elements the velocity is an order of $h$ more accurate}
\norm{\mathbf{u} - \uh}_0 &\le C\norm{\mathbf{u}}_2 h^2 &&= O(h^2).
\label{eqn:bddf1_velocity_error}
\end{align}
%\left(\norm{\mathbf{u}}_2+\norm{\mathbf{u}\cdot\nu}_{2,\Gamma_D}\right)

\subsection{The upscaling technique}

The subgrid upscaling technique uses the mixed finite element method (MFEM)
with a variational multiscale~\cite{HFMQ_1998} technique.

Before any approximation, decompose $\mathbf{V}$ and $W$ (using a chosen
coarse grid) so that
\begin{enumerate}
\item[(i)] no information is lost, $\mathbf{V} = \Vc \oplus \dlV$,
  $W = \Wc \oplus \dlW$;
\item[(ii)] mass is conserved, $\divdot \Vc = \Wc$, $\divdot \dlV = \dlW$; and
\item[(iii)] the fine-scale velocities are locally supported over the coarse grid, \\
  \mbox{$\dlV \cdot \nu = 0$} on the boundary of each coarse cell.
\end{enumerate}
(Section~3.1 --- in particular Theorem~3.3 --- of~\cite{Arbogast_2002_submitted}
show that this decomposition is always possible and satisfies some additional
necessary properties.  There is also more than one way to choose the
decomposition.)

Note that Condition (iii) above allows us to disconnect the subgrid problems
from one another, but not from the coarse grid problem.  A Green's or influence
function approach is employed to effect a forward elimination of the subgrid
problems into the coarse problem, and --- once the coarse problem has been
solved --- a back substitution to recover the subgrid solutions.  The two can
then be recombined to obtain a fine scale solution.

Since $\mathbf{V}$ and $W$ have been decomposed into direct sums, the
problem~\eqref{eqn:continuity_var}--\eqref{eqn:darcy_var} can be described as
follows.  Find $\uc \in \Vc$ and $\pc \in \Wc$, and $\dlu \in \dlV$ and
$\dlp \in \dlW$ such that on the coarse scale
\begin{align}
(\divdot (\uc + \dlu), \wc) &= (b,\wc)  &  \forall \wc \in \Wc,
\label{eqn:continuity_coarse} \\
(\mathbf{k} (\uc + \dlu), \vc) - (\pc+\dlp,\divdot \vc) &
= (\mathbf{c}, \vc) - (g_D,\vc\cdot \nu)_{\Gamma_D}  &  \forall \vc \in \Vc,
\label{eqn:darcy_coarse}
\end{align}
and on the subgrid scale
\begin{align}
(\divdot (\uc + \dlu), \dlw) &= (b, \dlw)  &  \forall \dlw \in \dlW,
\label{eqn:continuity_subgrid} \\
(\mathbf{k} (\uc + \dlu), \dlv) - (\pc+\dlp,\divdot \dlv) &
= (\mathbf{c}, \dlv)  &  \forall \dlv \in \dlV.
\label{eqn:darcy_subgrid}
\end{align}
Then $\mathbf{u} = \uc + \dlu$ and $p = \pc + \dlp$ solve the
original variational problem~\eqref{eqn:continuity_var}--\eqref{eqn:darcy_var}.

To define the $\delta$-subgrid operator and perform the forward elimination of
the coarse components, rewrite the subgrid scale equations with coarse
components on the right-hand-side.  That is, consider the coarse components as
sums of basis elements
\begin{equation}
\pc = \sum_i \alpha_i \wc^i  \qquad\text{and}\qquad  \uc = \sum_j \beta_j \vc^j.
\end{equation}
The parts of the $\delta$-operator are obtained by substituting these
expressions in~\eqref{eqn:continuity_subgrid}
and~\eqref{eqn:darcy_subgrid} and solving for the $\alpha_i$- and
$\beta_j$-influence function coefficients.

The constant part of the $\delta$ operator is given by solving
\begin{align}
(\divdot \dlubar, \dlw) &= (b, \dlw)  &  \forall \dlw \in \dlW, \\
(\mathbf{k} \dlubar, \dlv) - (\dlpbar,\divdot \dlv) &
= (\mathbf{c}, \dlv)  &  \forall \dlv \in \dlV. \\
\intertext{The $\Wc$-linear part of the $\delta$ operator is given by solving}
(\divdot \dlutwidi, \dlw) &= 0  &  \forall \dlw \in \dlW, \\
(\mathbf{k} \dlutwidi, \dlv) - (\dlptwidi,\divdot \dlv) &
= (\wc^i,\divdot \dlv)  &  \forall \dlv \in \dlV. \\
\intertext{The $\Vc$-linear part of the $\delta$ operator is given by solving}
(\divdot \dluhatj, \dlw) &= -(\divdot \vc^j,\dlw)  &  \forall \dlw \in \dlW, \\
(\mathbf{k} \dluhatj, \dlv) - (\dlphatj,\divdot \dlv) &
= -(\mathbf{k} \vc^j, \dlv)  &  \forall \dlv \in \dlV.
\end{align}
Then
\begin{alignat}{3}
\dlp &= \sum \alpha_i \dlptwidi &&+ \sum \beta_j \dlphatj &&+ \dlpbar  \nonumber\\
&\equiv \dlptwid(\pc) &&+ \dlphat(\uc) &&+ \dlpbar
\end{alignat}
and
\begin{alignat}{3}
\dlu &= \sum \alpha_i \dlutwidi &&+ \sum \beta_j \dluhatj &&+ \dlubar  \nonumber\\
&\equiv \dlutwid(\pc) &&+ \dluhat(\uc) &&+ \dlubar
\end{alignat}
are implicit expressions for $\dlp$ and $\dlu$ in terms of $\pc$ and $\uc$.
Note that $\dlptwid(\wc^i)=\dlptwidi$, $\dlphat(\vc^j)=\dlphatj$,
$\dlutwid(\wc^i)=\dlutwidi$, and $\dluhat(\vc^j)=\dluhatj$.

Return to the coarse scale
equations~\eqref{eqn:continuity_coarse}--\eqref{eqn:darcy_coarse}.  Rewrite them
substituting in the influence-function expressions for $\dlp$ and $\dlu$, and
gathering the coarse coefficients $\alpha_i$ and $\beta_j$ out front.  Equations
for the coarse components' coefficients result
\begin{align}
\sum_i \alpha_i (\divdot \dlutwidi, \wc) +
\sum_j \beta_j (\divdot (\vc^j + \dluhatj), \wc) &
= (b - \divdot \dlubar,\wc)  \nonumber\\
& \mspace{84mu} \forall \wc \in \Wc, \\
\sum_i \alpha_i (\mathbf{k} \dlutwidi, \vc) +
\sum_j \beta_j (\mathbf{k} (\vc^j + \dluhatj), \vc) \mspace{54mu}  & \nonumber\\
- \sum_i \alpha_i (\wc^i+\dlptwidi,\divdot \vc) -
\sum_j \beta_j (\dlphatj,\divdot \vc)
&= (\mathbf{c} - \mathbf{k} \dlubar, \vc) +
(\dlpbar,\divdot \vc)  \nonumber\\
& \mspace{84mu} \forall \vc \in \Vc,
\end{align}
or using the operator notation
\begin{align}
(\divdot (\uc + \dlutwid(\pc) + \dluhat(\uc)), \wc) &
= (b - \divdot \dlubar,\wc)  &
\forall \wc \in \Wc,
\label{eqn:continuity_coarse_up} \\
(\mathbf{k} (\uc + \dlutwid(\pc) + \dluhat(\uc)), \vc) \mspace{54mu}  && \nonumber\\
- (\pc+\dlptwid(\pc)+\dlphat(\uc),\divdot \vc) &
= (\mathbf{c}-\mathbf{k}\dlubar, \vc) + (\dlpbar,\divdot\vc)  & \nonumber\\
& \qquad - (g_D,\vc\cdot \nu)_{\Gamma_D}  &
\forall \vc \in \Vc,
\label{eqn:darcy_coarse_up}
\end{align}

Solve these equations for $\alpha_i$ and $\beta_j$ (that is, $\uc$ and $\pc$),
and write the solution as
\begin{alignat}{2}
p &= \pc &&+ \dlp  \nonumber\\
&= \sum_i \alpha_i \wc^i &&+ \sum_i \alpha_i \dlptwidi +
\sum_j \beta_j \dlphatj + \dlpbar \\
\intertext{and}
\mathbf{u} &= \uc &&+ \dlu  \nonumber\\
&= \sum_j \beta_j \vc^j &&+ \sum_i \alpha_i \dlutwidi +
\sum_j \beta_j \dluhatj + \dlubar.
\end{alignat}

Lastly,~\eqref{eqn:continuity_coarse_up}--\eqref{eqn:darcy_coarse_up}
can be written in symmetric form by rearranging terms and using some identities
not identified here.  See~\cite{Arbogast_2002_submitted} for details, as well as
proofs that each of the above problems are well-posed.

Also note that the $\delta$-subgrid operator has a natural definition.  No
information has been lost in the reformulation of the problem, and no ad-hoc
assumptions have been reintroduced to simplify the action of the $\delta$
operator.

\subsection{Approximating the upscaled problem}

Only after deciding on the decomposition is the approximation then made.  We are
free to pick discretizations for each subgrid independent of each of the others.
The discretization of the coarse spaces is only constrained by the already
chosen coarse grid.  Standard mixed finite element spaces such as Raviart-Thomas
or BDDF elements will do.

The subgrid upscaling technique has a number of advantages.  Each subgrid
problem is independent from the others because of the Neumann boundary
conditions along coarse cell edges; the subgrid problems can be solved in
parallel with no communication of boundary data needed across coarse cell edges.
Each subgrid problem also has many fewer degrees of freedom than the whole
problem (by about a factor of the number of coarse grid cells); these problems
are far better conditioned, and may be amenable to a direct solver.  Each
subgrid problem has the same left-hand side and many different right-hand sides
creating an opportunity for computational efficiency.  The coarse problem has
many fewer degrees of freedom than the whole problem (by about a factor of the
number of fine cells chosen per coarse cell) and so is also much better
conditioned.  If the coarse grid spacing is a small multiple of the fine grid
spacing, the work in solving the upscaled problem is nearly all done in solving
the coarse problem.  Lastly, there is only one coarse grid (not many as in
multigrid).

Further, the number of degrees of freedom in the upscaling space is nearly as
many as that in the fine space (compare Figure~\ref{fig:fine_basis} with
Figure~\ref{fig:upscale_basis} below).  However, for small upscaling factors the
upscaled solution is only about as computationally expensive as a coarse solution.
%  FIXME: operation counts.
This would be only mildly interesting if the upscaling solution was only as
accurate as a coarse solution, but in fact it captures much more (see example
below in Subsection~\ref{subsec:upscaling_practical_results}; also see
Figures~\ref{fig:twolevel_perm}--\ref{fig:brent_varyHh} in
Section~\ref{sec:twolevel} on the multigrid-like preconditioner).

There are some algorithm implementation issues to be considered.  First, the
elements of $\dlWh$ may not be easy to compute with.  For instance, suppose RT0
elements are used for $\Wh$ and $\WH$, and $\dlWh=\left(\WH\right)^\perp$ with
the complement taken in $\Wh$.  Then elements of $\dlWh$ do not have local
support on fine cells --- they must have zero average on coarse cells.  A
computational trick explained in~\cite{Arbogast_2002}
and~\cite{Arbogast_Bryant_2002} avoids this complication.

Second, there is obvious parallelism in the subgrid problems since each one is
independent of all the others, and the coarse-scale influence functions allow
disconnecting the subgrid from the coarse scale.  However, for coarse-scale
problem there is not any obvious parallelism.  To overcome this lack of
parallelism, one could use a domain decomposition
method~\cite{Glowinski_Wheeler_1988} or another technique.

Lastly, all of the problems being considered are saddle point problems because
of the mixed formulation.  For small upscaling factors, the subgrid problems may
be solved directly.  In other cases and for the coarse problem, iterative
solvers may be necessary.  The Uzawa
algorithm~\cite{Brezzi_Fortin_1991,Braess_2001} may be used to solve the
indefinite system.  Also, the hybrid mixed
method~\cite{Arnold_Brezzi_1985,Brezzi_Fortin_1991} or a Schur complement could
be used to transform the indefinite problem into a positive definite one.

\subsection{Sample approximation of the upscaled problem}

As an example, use RT0 elements to approximate $\dlW$ and $\dlV$, denoted
$\dlWh$ and $\dlVh$, and BDDF1 elements for $\Wc$ and $\Vc$, denoted $\WH$ and
$\VH$.  Let $\WHh=\WH\oplus\dlWh$ and $\VHh=\VH\oplus\dlVh$.  Elements of $\WH$
are functions which are piecewise constant on coarse cells.  Elements of $\dlWh$
are functions which are piecewise constant on fine cells, but must have a zero
average over each coarse cell.  Elements of $\VH$ are vector-valued functions
where the components normal to coarse cell faces are continuous across those
faces, and vary linearly along those faces.  Lastly, elements of $\dlVh$ are
vector-valued functions where the components normal to fine cell faces are
continuous across those faces, and are constant along those faces; the normal
components across coarse cell faces are zero.  See
Figure~\ref{fig:upscale_basis} for an illustration.


%color diagram from slide 28 of Xian talk
\begin{figure}[htbp]
\begin{center}
{\setlength\unitlength{4pt}
\begin{picture}(72,48)(0,0)
\multiput(0,0)(0,16){3}{\multiput(0,0)(24,0){3}{
\textcolor{orangered}{%\linethickness{0.75pt}
\put(0, 4){\line(1,0){24}}\multiput(3, 4)(6,0){4}{\makebox(0,0){{$\times$}}}
\put(0, 8){\line(1,0){24}}\multiput(3, 8)(6,0){4}{\makebox(0,0){{$\times$}}}
\put(0,12){\line(1,0){24}}\multiput(3,12)(6,0){4}{\makebox(0,0){{$\times$}}}
\put( 6,0){\line(0,1){16}}\multiput( 6,2)(0,4){4}{\makebox(0,0){{$\times$}}}
\put(12,0){\line(0,1){16}}\multiput(12,2)(0,4){4}{\makebox(0,0){{$\times$}}}
\put(18,0){\line(0,1){16}}\multiput(18,2)(0,4){4}{\makebox(0,0){{$\times$}}}
}
\textcolor{seafoamgreen}{\multiput(2.5,0)(6,0){4}{\multiput(0,2)(0,4){4}{\makebox(0,0){\circle*{1}}}}}
\put(-1,0){
\put( 0,3){\makebox(0,0){{$\otimes$}}}\put( 0,13){\makebox(0,0){{$\otimes$}}}
\put(24,3){\makebox(0,0){{$\otimes$}}}\put(24,13){\makebox(0,0){{$\otimes$}}}
\put(5, 0){\makebox(0,0){{$\otimes$}}}\put(19.5, 0){\makebox(0,0){{$\otimes$}}}
\put(5,16){\makebox(0,0){{$\otimes$}}}\put(19.5,16){\makebox(0,0){{$\otimes$}}}
{\linethickness{1.5pt}\framebox(24,16){}}}
}}
\end{picture}}
\end{center}
\caption[Degrees of freedom on an ``upscaled'' grid.]{Degrees of
  freedom for 2-D RT0/BDDF1 elements on an ``upscaled'' rectangular grid:
  $\otimes$ coarse velocity (linear) degrees of freedom,
  \textcolor{orangered}{$\times$} subgrid velocity degrees of freedom, and
  \textcolor{seafoamgreen}{$\bullet$} pressure degrees of
  freedom.~\cite{Arbogast_2002}}
\label{fig:upscale_basis}
\end{figure}


No closure assumption is made regarding the permeability.  Accuracy is simply a
question of approximation theory --- how well does $\VHh$ approximate
$\mathbf{V}$? --- and not a question of how much the physics is changed.
Defining effective coarse permeabilities is not necessary.

The closure assumption can be said to be that all flux across a coarse element
face is due to the coarse-scale functions.  (The $\delta$-subgrid operator is
approximated using well-worn techniques; it is not altered in an ad-hoc fashion
to facilitate computation.)  In this example, to compensate for the coarseness
of the restriction of velocities along coarse edges, higher order accurate basis
functions have been employed.

For the BDDF1/RT0 elements, some error estimates are~\cite{Arbogast_2002_submitted}
\begin{alignat}{2}
\norm{\mathbf{u} - \uHh}_0 &
\le C\left\{\norm{p}_1 h^2 + \left(\norm{\mathbf{u}}_2+\norm{\mathbf{u}\cdot\nu}_{2,\Gamma_D}\right) H^2\right\} &&
= O(H^2), \\
\norm{p - \pHh}_0 &
\le C\left\{\norm{p}_1 h + \norm{\divdot\mathbf{u}}_1 h^2 +
  \left(\norm{\mathbf{u}}_2+\norm{\mathbf{u}\cdot\nu}_{2,\Gamma_D}\right) H^3\right\} &&
=O(h + H^3).
\end{alignat}
However, the simplicity of the above expressions belies some error analysis
complications; for instance, the $\pi_{H,h}$ operator for the upscaled elements
is obtained not just from gluing together the operators from the coarse elements
$\pi_H$ and each of the subgrid elements $\delta\pi_{h,E_c}$, but also solving
an auxiliary problem (except under additional assumptions).  The error estimates
above also do not capture all the subtleties of the subgrid upscaled solution
(see, for instance, the graphs of the eigenstructure of the subgrid upscaling
preconditioner in Section~\ref{sec:twolevel}).  One thing to note, though, is
that if $H/h$ is small, then $O(h+H^3)=O(h)$ for the pressure and
$O(H^2)=O((H/h)^2 h^2)$ for the velocity.  A comparison between the upscaling
error estimates with those for BDM1/BDDF1 elements on fine
grid,~\eqref{eqn:rt0_pressure_error} and~\eqref{eqn:bddf1_velocity_error}, shows
that they are the same.

\subsection{Some practical results}
\label{subsec:upscaling_practical_results}

To show the ability of the subgrid upscaling to approximate problems almost as
well as a full fine-scale solution (and far better than a coarse-scale one),
some results from a simulated quarter five-spot oil reservoir water flood are
presented.  The above ideas were adapted to two-phase immiscible displacements,
and run on the following example~\cite{Arbogast_2000}.

The domain is a 40\,m by 40\,m square with a uniform, rectangular, $40\times40$
grid.  The base-10 logarithm of the permeability field is shown in
Figure~\ref{fig:brent_logperm}; the porosity is 25\%.  The field initially
has a 20\% saturation of water.  There is an injection well in the lower-left
corner, and a production well in the top-right corner.  Each has a rate of
0.2\,m$^2$/day, and water is being injected.


\begin{figure}[htbp]
\begin{center}
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=3in]{eigstruct/brent/log10permx}
%\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=3in]{arbogast/brent-log10permx}
\end{center}
\caption[Sample permeability field.]{Logarithm of the permeability field (geostatistically generated).}
\label{fig:brent_logperm}
\end{figure}


\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c c}
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=2in]{arbogast/s1.051.ps} &
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=2in]{arbogast/s1.124.ps} \\
\end{tabular}
\end{center}
\caption[Water saturation contours for the fine solution.]{Water saturation
  contours at 100 and 500 days for the fine $40\times40$ solution.}
\label{fig:brent_fine_saturation}
\end{figure}


\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c c}
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=2in]{arbogast/s0.048.ps} &
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=2in]{arbogast/s0.092.ps} \\
\end{tabular}
\end{center}
\caption[Water saturation contours for the upscaled solution.]{Water saturation
  contours at 100 and 500 days for the upscaled $5\times5$ solution.}
\label{fig:brent_upscale_saturation}
\end{figure}


\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c c}
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=2in]{arbogast/s2.048.ps} &
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=2in]{arbogast/s2.092.ps} \\
\end{tabular}
\end{center}
\caption[Water saturation contours for the coarse solution.]{Water saturation
  contours at 100 and 500 days for the coarse $5\times5$ solution.}
\label{fig:brent_coarse_saturation}
\end{figure}


Figures~\ref{fig:brent_fine_saturation},~\ref{fig:brent_upscale_saturation},
and~\ref{fig:brent_coarse_saturation} speak for themselves.  The upscaled and
fine-scale solutions are certainly close in the eyeball norm; the coarsened
solution is not close to either and fails qualitatively to capture some
behavior.
%The number of degrees of freedom used in each simulation and the time
%taken to compute each solution are shown in Table~\ref{tbl:brent_compute_data}.
The upscaled solution requires only about as much time to compute as the coarse
solution, but has nearly as many degrees of freedom as the fine solution; hence
the ``success.''
%A graph comparing the computed production histories for the
%top-right well is shown in Figure~\ref{fig:brent_production}.


%\begin{table}[htbp]
%\begin{center}
%\begin{tabular}{l r r r}
% & Pressure DOFs & Velocity DOFs & Time \\
%Coarse & 25 & 80 & ??? \\
%Upscale & 1600 & 2880 & ??? \\
%Fine & 1600 & 6240 & ??? \\
%\end{tabular}
%\end{center}
%\caption{Degrees of freedom (DOFs) for each simulation, and time taken to
%  compute each time step for the solution.}
%\label{tbl:brent_compute_data}
%\end{table}
%
%If the above table gets put back in, add \listoftables back too
%
%
%\begin{figure}[htbp]
%\begin{center}
%\includegraphics[keepaspectratio=true,width=2in]{../2002-ia/bblsOil.eps} \\
%\end{center}
%\caption{Production history of top-right well.}
%\label{fig:brent_production}
%\end{figure}
%
%
%Oops, the timing data and production histories I have are from another example.
%Clip it?  (Yes.)

Throughout most of the rest of the paper, only RT0/RT0 elements will be used.
Although the ideas are applicable to others, the description, analysis, and
implementation of higher order combinations gets much more complicated.

\subsection{Other upscaling techniques}

Upscaling techniques are numerous; surveys of various ones currently under study
can be found
in~\cite{Durlofsky_2002,Farmer_2002,Renard_deMarsily_1997,Wen_GomezHernandez_1996}.
A few descriptions follow.  Chen et al.~\cite{CDGW_2003} describe their
technique as a local/global one: use a global coarse simulation to determine
appropriate boundary conditions for local simulations that then determine an
effective coarse permeability.  Other techniques are local; for instance,
periodic boundary conditions can be imposed on the subgrids when computing
effective permeabilities as in~\cite{BLP_1978,Sanchez-Palencia_1980}.
Oversampling is another possibility where a local problem (with various boundary
conditions) that is slightly larger than a subgrid is used to determine the
effective permeability for that subgrid; Chen and Hou~\cite{Chen_Hou_2002} and
Hou and Wu~\cite{Hou_Wu_1997} use a multiscale finite element technique to
implement this idea.  Wu, Efendiev, and Hou also use this idea~\cite{EHW_2000},
and in~\cite{WEH_2002} apply a flow-based gridding dynamic approach to limit the
necessary upscaling.  Holden and Nielsen~\cite{Holden_Nielsen_2000} have
developed a global upscaling method that uses a least-squares problem to
determine effective coarse permeabilities.
%Other upscaling techniques have been developed to specifically treat near-well situations~\cite{UNK}.

%``effective'' equals ``equivalent'' equals ``pseudo'' in reference to coarse permeabilities?
%FIXME.  durlofsky, chen, hou, wu, efendiev, jenny, he.

Some distinguishing features of the upscaling technique used here are the use of
mixed methods, the lack of ad-hoc assumptions about the $\delta$-subgrid
operator, and the lack of computed effective coarse permeabilities.

%----------------------------------------------
\section{A Two-level Multigrid-like Scheme}
\label{sec:twolevel}
%----------------------------------------------

In traditional multigrid, one pairs smoothings on fine and coarse grids together
with the intent of each smoother reducing the error in different parts of the
spectrum.  The coarse-grid smoother reduces the error on large scales, and the
fine-grid smoother reduces it on short scales~\cite{BHM_2000}.

However, fine-scale details of the problem have an effect on coarse-scale flow
behavior.  Thus, instead of a simple coarse-grid corrector in multigrid, a
corrector that better approximates the true coarse-scale behavior could be used.
Since a corrector that is much more computationally expensive than the simple
coarse one is not a reasonable choice, it is natural to try an upscaled coarse
corrector.  This is the basis for the first proposed algorithm.  A schematic is
shown in Figure~\ref{fig:twolevel_schematic}.


\begin{figure}[htbp]
\begin{center}
{\setlength\unitlength{6pt}
\begin{picture}(60,30)(0,0)

\put(0,10){\framebox(12,10){\shortstack[l]{
\textcolor{bo}{\small START \strut} \\
\textcolor{greyblue}{\footnotesize $i=0$ \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{x}_0 = \mathbf{0}$ \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{r}_0 = \mathbf{b} - A \mathbf{x}_0$ \strut}
}}}

\put(16,0){\framebox(12,12){\shortstack[l]{
\textcolor{bo}{\small UPSCALE \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{\tilde{c}}_i = \tilde{A}^{-1} R \mathbf{r}_i$ \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{x}_{i+1} = \mathbf{x}_i + P \mathbf{\tilde{c}}_i$ \strut} \\
\textcolor{greyblue}{\footnotesize $i \gets i+1$ \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{r}_i = \mathbf{b} - A \mathbf{x}_i$ \strut}
}}}

\put(16,18){\framebox(12,12){\shortstack[l]{
\textcolor{bo}{\small SMOOTH \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{c}_i = \alpha B_{\mathrm{J}}^{-1} \mathbf{r}_i$ \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{x}_{i+1} = \mathbf{x}_i + \mathbf{c}_i$ \strut} \\
\textcolor{greyblue}{\footnotesize $i \gets i+1$ \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{r}_i = \mathbf{b} - A \mathbf{x}_i$ \strut}
}}}

\put(32,12){\framebox(12,6){\shortstack[l]{
\textcolor{bo}{\small DONE? \strut} \\
\textcolor{greyblue}{\footnotesize Is $\norm{\mathbf{r}_i}$ small? \strut}
}}}

\put(48,12){\framebox(12,6){\shortstack[l]{
\textcolor{bo}{\small FINISH \strut} \\
\textcolor{greyblue}{\footnotesize $\mathbf{x} \approx \mathbf{x}_i$ \strut}
}}}

%{\linethickness{2pt}
%\put(12,15){\vector(4,8){8.9442719}}
%\put(28,23){\vector(4,-6){7.2111026}}
%\put(32,13){\vector(-4,-8){8.9442719}}
%\put(22,12){\vector(0,6){6}}
%\put(44,15){\vector(4,0){4}}
\put(12,15){\vector(1,2){4}}
%\put(28,7){\vector(2,3){4}}
\put(32,13){\vector(-2,-3){4}}
%\put(32,17){\vector(-1,2){4}}
\put(28,25){\vector(1,-2){4}}
\put(22,12){\vector(0,1){6}}
\put(44,15){\vector(1,0){4}}
%}

\put(30,9){ \textcolor{seafoamgreen}{\footnotesize NO} }
\put(44,16){ \textcolor{seafoamgreen}{\footnotesize YES} }

\end{picture}}
\end{center}
\caption[Schematic of a two-level scheme.]{Schematic of a two-level scheme to
  solve the linear system $A \mathbf{x} = \mathbf{b}$.  The matrix $A$ describes
  the interactions among cells from a CCFD discretization of Darcy's Law and the
  continuity equation.  The pressures $\mathbf{x}$ are the unknowns, and the
  information from sources, boundary terms, and gravity $\mathbf{b}$ forms the
  right-hand side.
  There is also the matrix $\tilde{A}$ for the corresponding upscaling MFEM
  problem, the $i$th guess at pressure unknowns $\mathbf{x}_i$, the $i$th-stage
  residual $\mathbf{r}_i = \mathbf{b} - A \mathbf{x}_i$, the $i$th-stage
  smoothing corrector $\mathbf{c}_i$, and $i$th-stage upscaling corrector
  $\mathbf{\tilde{c}}_i$ (pressure and velocity unknowns).
  Lastly, there is $B_{\mathrm{J}}$ from the diagonal elements of $A$ used in
  Jacobi smoothing step, the restriction operator $R: \Wh \rightarrow \WHh
  \times \mathbf{0}$, and the prolongation operator $P: \WHh \times \VHh
  \rightarrow \Wh$.
  Also note that usually many applications of the smoother may occur in between
  upscaling steps.
}\label{fig:twolevel_schematic}
\end{figure}


The intuition as to why multigrid works so well is that smoothing the solution
iterates on different levels reduces the error in different parts of the
spectrum.  Towards that end, we have developed code to compute the eigenvectors
(and corresponding eigenvalues) for smoothers at each level --- the fine-scale
level (with ordinary Jacobi smoothing) and the upscale smoothing --- and to plot
the norm of the gradient of the eigenvectors (considered as functions) versus
the corresponding eigenvalues.  The matrices analyzed have the form $I - M\,A$;
this is from the error update equation $\mathbf{e}_{i+1} = (I-M\,A)\mathbf{e}_i$
that describes the action of the preconditioner $M$ on the error $\mathbf{e}$.

The results of applying the code to several two dimensional examples can be seen
in Figures~\ref{fig:twolevel_perm}--\ref{fig:brent_varyHh}.  Each of the
examples has the unit square as the domain with uniform porosity and Dirichlet
boundary conditions.  The first example has a very simple permeability: it takes
one value on the left half of the domain, and another value on the right half
(shown in Figure~\ref{fig:twolevel_perm}).


\begin{figure}[htbp]
\begin{center}
\includegraphics[keepaspectratio=true,width=3in]{eigstruct/twolevel/simple-perm-diagram}
\end{center}
\caption{A simple permeability field.}
\label{fig:twolevel_perm}
\end{figure}


In Figure~\ref{fig:twolevel_typical} a ``typical'' result for the eigenstructure
analysis as applied to the simple two-level permeability field is shown.  On the
horizontal axis is measured the norm of the gradient of the eigenvector when
interpreted as a 2-D pressure field (this measure is a stand-in for frequency).
The vertical axis shows on a
logarithmic scale the absolute value of the corresponding eigenvalue.  The
blue dots show the check-mark-like structure for a weighted Jacobi smoother.
This picture is similar to ones usually used in multigrid
analysis~\cite{BHM_2000}.  The cyan dots show the structure one obtains
for the corresponding weighted Jacobi smoother on the coarse level.  It has the
same shape, but the small-eigenvalue dip comes at lower frequencies (coarser
modes) as expected.  The magenta dots show the structure for the
preconditioner that exactly solves the coarse problem.  It has eigenvalues
greater than one in the coarser modes because the upscaling factor
$H/h$ is equal to three, and not two (the more usual result with eigenvalues
bounded by one can be seen in the bottom right of
Figure~\ref{fig:twolevel_varyHh}).  Lastly, the red dots show the structure for
the upscaling preconditioner.  Some notable features are its complicated
organization, that coarse modes (over a large range) have quite small
eigenvalues, that there is a steady rise in eigenvalues versus frequency, and
that the eigenvalues top out at greater than one.  However, this maximum occurs
near where the weighted Jacobi smoother is performing at its best.


\begin{figure}[htbp]
\begin{center}
\includegraphics[clip=true,keepaspectratio=true,width=4in]{eigstruct/twolevel/rt0/simplek-24-08-1e-1}
\end{center}
\caption[A ``typical'' eigenstructure result.]{A ``typical'' eigenstructure
  result for the smoothers and upscaler (RT0 elements used at all levels).  The
  fine grid used was $24\times24$, the coarse grid $8\times8$ (with $H/h=3$),
  and $k_2=0.1$.  The colored dots are eigenpairs for the
  \textcolor{cyan}{$\bullet$} coarse smoothing preconditioner,
  \textcolor{magenta}{$\bullet$} coarse exact preconditioner,
  \textcolor{blue}{$\bullet$} fine smoothing preconditioner, and
  \textcolor{red}{$\bullet$} subgrid upscaling preconditioner.}
\label{fig:twolevel_typical}
\end{figure}


Figure~\ref{fig:twolevel_varyh} shows the effect of varying $h$, the grid size,
while leaving the upscaling factor $H/h$ and the permeability field alone.  As
can be seen, the overall structure remains the same independent of $h$, but the
level of detail one sees in the diagrams increases, as does the scale of the
frequencies plotted.


\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c c}

\multicolumn{2}{c}{Vary $h$ ; Fix $H/h=3$ ; Fix $k_2=0.1$} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-06-02-1e-1} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-12-04-1e-1} \\

{\small $6\times6$} &
{\small $12\times12$} \\

%{\tiny ~} & {\tiny ~} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-24-08-1e-1} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-48-16-1e-1} \\

{\small $24\times24$} &
{\small $48\times48$} \\

\end{tabular}
\end{center}
\caption[Eigenstructure results for varying fine grid sizes.]{Eigenstructure
  results for the preconditioners for problems with various fine grid sizes $h$,
  but fixed upscaling ratio $H/h$ and fixed permeability.  The colored dots are
  eigenpairs for the
  \textcolor{cyan}{$\bullet$} coarse smoothing preconditioner,
  \textcolor{magenta}{$\bullet$} coarse exact preconditioner,
  \textcolor{blue}{$\bullet$} fine smoothing preconditioner, and
  \textcolor{red}{$\bullet$} subgrid upscaling preconditioner.}
\label{fig:twolevel_varyh}
\end{figure}


Figure~\ref{fig:twolevel_varyHh} shows the effect of varying $H/h$, the
upscaling factor, while leaving the fine grid size $h$ and the permeability
field alone.  As $H/h$ increases, there is a shift to lower frequencies for the
coarse smoother and the upscaling preconditioner.  (As noted above, the
eigenvalues of coarse modes for the exact coarse preconditioner get larger with
increasing $H/h$.)  There is a general worsening of performance with increasing
$H/h$ as would be expected.


\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c c}

\multicolumn{2}{c}{\large Fix $h=1/24$ ; Vary $H/h$ ; Fix $k_2=0.1$ w/RT0} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-24-04-1e-1} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-24-06-1e-1} \\

{\small $H/h=6$} & {\small $H/h=4$} \\
%{\tiny ~} & {\tiny ~} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-24-08-1e-1} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/twolevel/rt0/simplek-24-12-1e-1} \\

{\small $H/h=3$} & {\small $H/h=2$} \\

\end{tabular}
\end{center}
\caption[Eigenstructure results for varying upscaling ratios.]{Eigenstructure
  results for the preconditioners for problems with various upscaling ratios
  $H/h$, but fixed fine grid size $h$ and fixed
  permeability.  The colored dots are eigenpairs for the
  \textcolor{cyan}{$\bullet$} coarse smoothing preconditioner,
  \textcolor{magenta}{$\bullet$} coarse exact preconditioner,
  \textcolor{blue}{$\bullet$} fine smoothing preconditioner, and
  \textcolor{red}{$\bullet$} subgrid upscaling preconditioner.}
\label{fig:twolevel_varyHh}
\end{figure}


Figure~\ref{fig:arcoperm_perm} shows the Arcoperm permeability
field~\cite{Arcoperm_1997}, and Figure~\ref{fig:brent_perm} shows the Brent
permeability field~\cite{Lindquist_1992}.  Both show considerably more variation
than the simple two-level permability field.  They both also have a greater
ratio between the greatest and least values of the permeability: the Arcoperm
field about a half an order of magnitude, and the Brent field more than four
orders of magnitude.  For the Arcoperm field, Figure~\ref{fig:arcoperm_varyHh}
shows the same rich structure as seen for the two-level permeability field, but
surprisingly the upscaling preconditioner has a similar performance: small
eigenvalues for coarse modes with a general trend towards larger (but not too
large) eigenvalues for fine modes.  At the greatest upscaling factor $H/h$ of
six, the middle modes begin to suffer, too.  The results in
Figure~\ref{fig:brent_varyHh} show the same general trends.  However, the middle
modes become much more of a problem (note the vertical scale has changed from a
high of $10^1$ before to $10^2$ now).


\begin{figure}[htbp]
\begin{center}
\includegraphics[keepaspectratio=true,width=3in]{eigstruct/arcoperm/log10perm}
\end{center}
\caption[The Arcoperm permeability field.]{The Arcoperm permeability field.  The
  minimum of the base-10 logarithm of the permeability values is -0.2916 and the
  maximum 0.1477.}
\label{fig:arcoperm_perm}
\end{figure}


\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c c}

\multicolumn{2}{c}{\large Fix $h=1/40$ ; Vary $H/h$ ; Fix $k$ w/RT0} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/arcoperm/rt0/arcoperm-40-20} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/arcoperm/rt0/arcoperm-40-08} \\

{\small $H/h=2$} & {\small $H/h=5$} \\
%{\tiny ~} & {\tiny ~} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/arcoperm/rt0/arcoperm-40-05} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/arcoperm/rt0/arcoperm-40-02} \\

{\small $H/h=8$} & {\small $H/h=20$} \\

\end{tabular}
\end{center}
\caption[Eigenstructure results for varying upscaling ratios.]{Eigenstructure
  results for the preconditioners for problems with
  various upscaling ratios $H/h$ with the Arcoperm permeability field in
  Figure~\ref{fig:arcoperm_perm}.  The colored dots are eigenpairs for the
  \textcolor{cyan}{$\bullet$} coarse smoothing preconditioner,
  \textcolor{magenta}{$\bullet$} coarse exact preconditioner,
  \textcolor{blue}{$\bullet$} fine smoothing preconditioner, and
  \textcolor{red}{$\bullet$} subgrid upscaling preconditioner.}
\label{fig:arcoperm_varyHh}
\end{figure}


\begin{figure}[htbp]
\begin{center}
\includegraphics[clip=true,keepaspectratio=true,angle=-90,width=3in]{eigstruct/brent/log10permx}
\end{center}
\caption[The Brent permeability field.]{The Brent permeability field.  The
  minimum of the base-10 logarithm of the permeability values is -0.6909 and the
  maximum 3.5230.}
\label{fig:brent_perm}
\end{figure}


\begin{figure}[htbp]
\begin{center}
\begin{tabular}{c c}

\multicolumn{2}{c}{\large Fix $h=1/40$ ; Vary $H/h$ ; Fix $k$ w/RT0} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/brent/rt0/brent-40-20} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/brent/rt0/brent-40-08} \\

{\small $H/h=2$} & {\small $H/h=5$} \\
%{\tiny ~} & {\tiny ~} \\

\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/brent/rt0/brent-40-05} &
\includegraphics[clip=true,keepaspectratio=true,width=2in]{eigstruct/brent/rt0/brent-40-02} \\

{\small $H/h=8$} & {\small $H/h=20$} \\

\end{tabular}
\end{center}
\caption[Eigenstructure results for varying upscaling ratios.]{Eigenstructure
  results for the preconditioners for problems with
  various upscaling ratios $H/h$ with the Brent permeability field in
  Figure~\ref{fig:brent_perm}.  The colored dots are eigenpairs for the
  \textcolor{cyan}{$\bullet$} coarse smoothing preconditioner,
  \textcolor{magenta}{$\bullet$} coarse exact preconditioner,
  \textcolor{blue}{$\bullet$} fine smoothing preconditioner, and
  \textcolor{red}{$\bullet$} subgrid upscaling preconditioner.}
\label{fig:brent_varyHh}
\end{figure}


One obvious conclusion to draw from the eigenstructure diagrams is that the
performance of the upscaling level suffers in the mid-frequency eigenvectors
(see also the convergence histories below).  Perhaps Krylov space methods (a
CG-like iteration)~\cite{Bruaset_1995,Golub_VanLoan_1996,Demmel_1997}, a third
level with an upscaler based on a grid staggered relative to the first upscaler,
semi-coarsening in different levels in different directions~\cite{CWW_1992}, or
perhaps a traditional multigrid coarse smoother might be used to improve the
method.
% the (new piece of the) staggered preconditioner may have a similar
% eigenstructure diagram as for the un-staggered one, but the e-vectors are
% hopefully ``complementary'' (a coarse one decomposes into two modes of the
% other that are half-again as coarse)

The diagrams also lead to the reasonable inference that the behavior of the
proposed two-level scheme depends on $H/h$ and $k^*/k_*$, but not the absolute
level of $h$.  This mirrors the behavior of multigrid, but we have not proven
this yet for this scheme.
%???it's not really $k^*/k_*$ but the size of the jumps in k btw adjacent cells???

In this proposal, we have been assuming that a single coarsening of the grid is
enough to result in a problem that is computationally inexpensive to use.
Sometimes this ``coarse'' grid just is not coarse enough, and the associated
problem is \emph{still} too computationally expensive to work with.  In this
case, a true variational \emph{multi}-scale (and not just two-scale) method for
the upscaling level would be appropriate.  This might be accomplished say by
decomposing $\mathbf{V}=\Vc\oplus\dlV=\Vsc\oplus\DlV\oplus\dlV$ and
$W=\Wc\oplus\dlW=\Wsc\oplus\DlW\oplus\dlW$ into three levels (or more) with a
correspoding approximation with multiple levels.  The details of this
possibility have yet to be worked out.  It might also be possible to use
traditional algebraic multigrid on the coarsened system that comes from the upscaled
problem~\eqref{eqn:continuity_coarse_up}--\eqref{eqn:darcy_coarse_up}.

The eigenstructure diagrams also give us a way to estimate how often the Jacobi
smoother is applied relative to the number of times the upscaling preconditioner
is applied.  It is also possible to then estimate the overall error reduction
factor for an iteration of our two-level method.  This is accomplished by
examining frequency by frequency the product of the error reduction factors
(eigenvalues) of the smoother and upscaling preconditioner, and by computing the
maximum such product.  This allows us also to estimate the relative performance
of multigrid with the two-level upscaling/smoother method for a given problem.
Of course, this is assuming that eigenvectors with like ``frequency'' are alike;
in reality, the eigenbases differ and the comparison for a given frequency is
not apples-to-apples.

Some examples of convergence histories using this analysis are shown in
Figure~\ref{fig:conv_histories}.  The size of the residuals/errors versus
iteration number are plotted for both the two-level upscaling scheme and a
two-level multigrid scheme.  Significantly faster convergence is obtained with
the upscaling than with multigrid (although both display linear convergence and
dependence on $k^*/k_*$).  A zero initial guess for the solution is used along
with a random source term.


\begin{figure}[htbp]
\centerline{
\begin{tabular}{c c}
\multicolumn{2}{c}{
\includegraphics[keepaspectratio=true,width=3in]{eigstruct/twolevel/simple-errors}
} \\
\includegraphics[keepaspectratio=true,width=3in]{eigstruct/arcoperm/arcoperm-errors} &
\includegraphics[keepaspectratio=true,width=3in]{eigstruct/brent/brent-errors} \\
\end{tabular}}
\caption[Convergence histories of the two-level upscaling and multigrid
  schemes.]{Convergence histories of the two-level upscaling and multigrid
  schemes for the three sample permeability fields in
  Figures~\ref{fig:twolevel_perm}, \ref{fig:arcoperm_perm},
  and~\ref{fig:brent_perm}.  The light and dark blue colored dots mark the sizes
  of the residual for the multigrid and upscaling schemes, respectively.  The
  light and dark red dots mark the sizes of the error.}
\label{fig:conv_histories}
\end{figure}


The diagram in the top row shows the convergence history for the simple
permeability field (shown in Figure~\ref{fig:twolevel_perm}) on a $24\times24$
grid with $H/h=4$ and $k_2=0.1$.  An estimation from the eigenstructure diagram
in the top-right of Figure~\ref{fig:twolevel_varyHh} indicates that a single
Jacobi smoothing step pre- and post-upscaling in a V-cycle will ensure
convergence.  Indeed this is the case: the upscaling scheme has a convergence
factor of about $0.6$ per V-cycle step.  On the other hand, multigrid has a
factor of about $0.94$ for the same V-cycle with a smoother on the coarse level.

The diagram on the left in the bottom row shows the convergence history for the
Arcoperm permeability field shown in Figure~\ref{fig:arcoperm_perm} for $H/h=5$.
An estimation from the eigenstructure diagram in the top-right of
Figure~\ref{fig:arcoperm_varyHh} indicates that a single Jacobi smoothing step
pre- and post-upscaling in a V-cycle again will ensure convergence.  This is the
case again; however, this time the asymptotic convergence factor is only about
the same for the upscaling scheme as it is for multigrid.  There is, though, a
steep initial decline.  This is probably attributable to the initial guess for
the solution (all zeros) having an error that has large components in the
direction of the coarse eigenmodes of the upscaling preconditioner (this may
also explain the large jump in the first step (only) for the simple permeability
field above).  The ``stalling'' of the convergence is probably attributable to
the poor performance of the preconditioner on middle eigenmodes.

Modifying the upscaling preconditioner is one fix.  Another is simply increasing
the number of Jacobi smoothings on the fine level from one to two.  As can be
seen in the left diagram of Figure~\ref{fig:conv_hist_others}, this has a
dramatic effect.  The upscaling scheme now clearly outperforms multigrid.  It
would seem that over-estimating the number of necessary fine smoothings has a
beneficial effect (and is not very computationally expensive).


\begin{figure}[htbp]
\centerline{
\begin{tabular}{c c}
\includegraphics[keepaspectratio=true,width=3in]{eigstruct/arcoperm/arcoperm-better-errors} &
\includegraphics[keepaspectratio=true,width=3in]{eigstruct/brent/brent-mistake-errors} \\
\end{tabular}}
\caption[More convergence histories of the two-level upscaling and multigrid
  schemes.]{More convergence histories of the two-level upscaling and multigrid
  schemes that illustrate the effect of over- and under-estimating the number of
  necessary smoothings.  The coloring scheme is the same as used in
  Figure~\ref{fig:conv_histories}.  The diagram on the left is comparable to the
  one in the bottom-left of Figure~\ref{fig:conv_histories}; two smoothings per
  step are used instead of one.  The diagram on the right is comparable to the
  one in the bottom-right of Figure~\ref{fig:conv_histories}; five smoothings per
  step are used instead of forty.}
\label{fig:conv_hist_others}
\end{figure}


This may also partly explain the good performance of the upscaling scheme in the
last, bottom-right diagram of Figure~\ref{fig:conv_histories}.  This is the
convergence history of the Brent permeability field with $H/h=5$.  Estimating
the number of necessary smoothings from the top-right eigenstructure diagram of
Figure~\ref{fig:brent_varyHh} is more difficult this time (note the most
smoothings comes from the coarsest eigenmode --- the red dot furthest to the
left).  Forty smoothings were used; this is most likely an over-estimate of the
number of smoothings necessary for a convergent scheme.  Again, this seems to
have a beneficial effect for the upscaling scheme relative to multigrid.

The penalty to be paid for under-estimating the number of necessary smoothings
can be seen in the right-hand diagram of Figure~\ref{fig:conv_hist_others}.  A
divergent scheme may result.  (This is the ``convergence'' history for the Brent
field with $H/h=5$ as before, but with only five smoothings.)  One last feature to
note is that for all the convergent schemes, the error is monotonically
decreasing (as is expected for a multigrid-like scheme).

%operation counts (per level/per step) and a comparison with MG(reference).
In the future we would like to produce plots of the size of \emph{spectrum} of
the error versus iteration number.  A break-down of inter-step frequency
reduction (the spectrum before and after smoothing, and before and after
upscaling) would also be useful.  The ability to visualize a given eigenmode
(that is, make an $n$-dimensional plot of the pressure that corresponds to an
eigenvector) by clicking on the eigenstructure plot would be a handy tool.

In future research, we hope to be able to prove that our two-level scheme
converges linearly (independent of $h$, but dependent on $H/h$ and maybe
$k^*/k_*$).  Toward that end, the methods used in standard multigrid will
certainly be useful.  There are several monographs~\cite{Hackbusch_1985,
McCormick_1987, Wesseling_1992, Bramble_1993, Hackbusch_1993, Bruaset_1995,
Saad_1995, Shaidurov_1995, BHM_2000, Braess_2001} and review
articles~\cite{Xu_1992, Yserentant_1993, Xu_1997A, Xu_1997B} that provide useful
information (several of the monographs on iterative methods generally).  As
well, Bramble, Pasciak, and Xu have developed a general framework for subspace
correction methods~\cite{BPX_1991, BPWX_1991A, BPWX_1991B, Xu_1992,
Bramble_1993, Xu_1997A, Xu_1997B}.  Their work will be useful in having
developed a unified approach to multigrid-like methods, and also specifically
because they treat the possibility of non-nested spaces for corrections ($\VHh$
is not necessarily a subset of $\Vh$), and perturbed linear forms (e.g., using
quadrature on the term $(\mathbf{k} \mathbf{u}_h, \mathbf{v}_h)$).

% refer other sections on linear solver techniques to here

% other monographs?
%  P. Oswald,  Multilevel Finite Element Approximation: Theory and Applications.
%  Teubner Skripte zur Numerik, Teubner, Stuttgart, 1994 (ISBN 3-519-02719-4)
%  Hackbusch and Trottenberg. MG Methods. LNM #960. SV 1982.
% other review articles? others from briggs bib

% add ``and historical papers'' to list above
% historical: (Bank_Dupont_1981 is only one in todd's; rest not)
% bank and dupont 3
% braess and hackbush 5
% cai, mandel, and mccormick 1997  14?
% braess, brandt, yserentant, trottenberg, and others?
% others from briggs bib?

Lastly, we might try to modify the upscaling preconditioner so that it itself is
a smoother.  Also, the upscaling preconditioner might be modified for use in
discontinous Galerkin (DG) methods, or the expanded mixed method~\cite{AWY_1997}.

%----------------------------------------------
\section{Change the Problem, Not the RHS: \\
    Using nonlinear optimization techniques to adjust the basis of the mixed
    variational multiscale method}
\label{sec:nonlinear}
%----------------------------------------------

In constructing the subgrid upscaling, the ``closure'' assumption in upscaling
was that all flow between coarse cells occurred on the coarse scale.  Then, in
approximating the coarse velocity space, no (or a limited) variation of the flow
is allowed along coarse cell edges (RT0 elements have a constant flux through
cell boundaries, BDDF1 elements only a linearly-varying one).  Since the
fine-scale flow almost always varies along coarse cell edges, our upscaled
solution cannot coincide with the fine-scale solution (that is, usually $\uh
\notin \VHh$ even though $\ph \in \WHh = \Wh$ always for RT0 elements).

Our goal is to obtain the fine-scale solution (and to do so without entailing the
work of finding it more directly).  In the two-level scheme, the ``defect'' of
coarse grained inter-coarse-cell flow was overcome by pairing the upscaler with
a fine scale smoother.  We now consider instead modifying the discretization of
the upscaled solution so as to permit the fine-scale and upscaled solutions to
coincide (that is, find a $\VtwidHh$ that is structured like $\VHh$ and has
$\uh = \utwidHh \in \VtwidHh$).

In approximating the upscaled solution, all the pressure degrees of freedom are
left in and some velocity ones are discarded (as compared to the fine-scale
approximation).  All of the discarded degrees of freedom are along coarse cell
boundaries --- to recall, see the differences between
Figures~\ref{fig:fine_basis} and~\ref{fig:upscale_basis}.
Figure~\ref{fig:mod_vel_basis} shows a sample modified basis element.  In it are
reintroduced exactly those degrees of freedom (call them $\beta_i$'s)
along coarse cell boundaries that are ``missing'' from the basis for the
upscaled approximation.  This gives a parameterized family
$\VtwidHh(\bvec{\beta})$ of approximation spaces of $\mathbf{V}$ that can be
used in the upscaling scheme.  In the decomposition $\VHh = \VH \oplus \dlVh$
just change $\VH$ to $\VtwidH$, leave $\dlVh$ alone, and set $\VtwidHh = \VtwidH
\oplus \dlVh$.  With this new approximation space $\VtwidHh$ we can solve
upscaled flow problems just as before (thinking of $\bvec{\beta}$ as fixed for
any particular problem).

If the extra degrees of freedom $\bvec{\beta}$ are set uniformly, the same old
upscaled basis results; that is, $\VtwidHh(\mathbf{1})=\VHh$.  If for a given
problem the extra degrees of freedom are set in proportion to the fine-scale
solution to that problem, the modified upscaled problem will then have the same
solution as the fine-scale problem.


\begin{figure}[htbp]
\centerline{
\begin{tabular}{c c}
\includegraphics[keepaspectratio=true,width=2.5in]{nonlinear/ramp-schematic} &
\includegraphics[keepaspectratio=true,width=2.5in]{nonlinear/multiramp-schematic} \\
\includegraphics[keepaspectratio=true,width=3in]{nonlinear/ramp} &
\includegraphics[keepaspectratio=true,width=3in]{nonlinear/multiramp} \\
\end{tabular}}
\caption[Unmodified and modified velocity basis elements for coarse
  flow.]{Unmodified and modified velocity basis elements for coarse flow.  An
  ``X'' represents the coarse degree of freedom.  Circles represent degrees of
  freedom that are adjusted in the nonlinear optimization.  The filled circle
  represents the normalized degree of freedom.  The modified basis functions may
  be discontinuous across the dashed lines as well as across the outer coarse
  cell boundaries.}
\label{fig:mod_vel_basis}
\end{figure}


This raises a question: how should the coarse basis be adjusted to give a new
upscaled problem that has the fine-scale solution as its answer?  If given the
fine-scale solution, it could be used to apportion the extra coarse-scale
velocity degrees of freedom.  Of course, if the fine-scale solution were known,
there would be no more work to do.  However, if we have already solved some
modified upscaled problem, its fine-scale residual can easily be computed and
used as an error indicator.  That is, prolongate the modified upscaled solution
to the fine-scale space then compute the residual as usual.  (With RT0 elements
for the fine-scale basis and RT0/RT0 elements for the upscaled basis,
prolongation is the identity operation (on functions).  The modifications to the
upscaled basis are conforming to the fine-scale elements.)  The residual of the
velocity part of the equations is used
as an error indicator --- where the residual is large along a coarse cell edge,
the error in the approximate velocity is large --- to
determine how to reapportion the extra coarse-scale velocity degrees of freedom
to better approximate the fine-scale solution.  With the new $\bvec{\beta}$
another upscaled problem is set-up, and the process repeated.  Stop when the
residual is zero everywhere (or ``small'' to our satisfaction).  A diagram
illustrating the steps is shown in Figure~\ref{fig:beta_sequence}.


\begin{figure}[htbp]
%\centerline{
%\setlength{\tabcolsep}{0.25em}
%\begin{tabular}{r c l}
%{\setlength\unitlength{1em}
%\begin{picture}(1,4)
%\put(1,0){\line(-1,0){1}}
%\put(0,0){\line(0,1){1}}
%\put(0,1){\vector(1,0){1}}
%\end{picture}} &
%\begin{tabular}{l l}
%\textcolor{green}{\textbf{START:}} & Set $\bvec{\beta}=\mathbf{1}$. \\
%\textcolor{green}{\textbf{TEST:}} & Is $\mathbf{F}(\bvec{\beta})\approx\mathbf{0}$? \\
%\textcolor{green}{\textbf{UPDATE:}} & $\bvec{\beta}\leftarrow\bvec{\beta}+\mathbf{NC}(\mathbf{F}(\bvec{\beta}))$ \\
%\textcolor{green}{\textbf{DONE:}} & $\uh=\mathrm{P}\utwidHh$ \\
%\end{tabular} &
%{\setlength\unitlength{1em}
%\begin{picture}(1,4)
%\put(0,1){\line(1,0){2}}
%\put(1,1){\line(0,-1){1}}
%\put(1,0){\vector(-1,0){1}}
%\put(2,1){\line(0,-1){2}}
%\put(2,-1){\vector(-1,0){2}}
%\end{picture}} \\
%\end{tabular}}
\centerline{\parbox{4.5in}{
0.  \textcolor{green}{\textbf{Start}} with $m=0$ and $\bvec{\beta}^0=\mathbf{1}$. \\
1.  \textcolor{green}{\textbf{Solve}} an upscaled problem with the spaces
$\WHh\ni\pHh^m$ and $\VtwidHh(\bvec{\beta}^m)\ni\utwidHh^m$. \\
2.  \textcolor{green}{\textbf{Compute}} the velocity residual $\mathbf{r}_h^m$ of $P\utwidHh^m$. \\
3.  \textcolor{green}{\textbf{Scale}} the residual
$\mathbf{s}_h^m=\omega\fdmatrix{D}^{-1}\mathbf{r}_h^m$ using the diagonal of the
matrix $\fdmatrix{D}$ from the fine-scale term $(\fdmatrix{k}\uh,\vh)$. \\
4.  \textcolor{green}{\textbf{Test}} if $\mathbf{s}_h^m \cdot \mathbf{r}_h^m$ is
small.  If so, stop. \\
5.  \textcolor{green}{\textbf{Restrict}} the scaled residual
$c^m=R\mathbf{s}_h^m$ to coarse cell edges. \\
6.  \textcolor{green}{\textbf{Update}} parameters
$\bvec{\beta}^{m+1}=\bvec{\beta}^m+C(c^m)$ with a function $C$ of the corrector
$c^m$.  Increment the counter $m\leftarrow m+1$. \\
7.  \textcolor{green}{\textbf{Repeat}} the process starting at step 1 with the
new $\bvec{\beta}$. \\
}}
\caption[Corrector scheme for $\bvec{\beta}$.]{Corrector scheme for
  $\bvec{\beta}$.  In it, solve a sequence of upscaled problems each with a
  different $\bvec{\beta}^m$.  The goal is to have
  $\bvec{\beta}^m\rightarrow\bvec{\beta}^*$.  The prolongation
  $P:\VtwidHh\rightarrow\Vh$ is simply the identity operation if RT0 elements
  are used throughout.  The restriction $R:\Vh\rightarrow\Lambda_{H,h}$ gives
  the trace of the normal component of the velocity along coarse cell edges.
  The space $\Lambda_{H,h}$ is like the Lagrange multiplier space
  of~\cite{Arnold_Brezzi_1985} but with $h$-scale variation on coarse cell edges
  of size $H$.  The corrector function $C$ may have many forms (linear or
  nonlinear); see the discussion in the text.}
\label{fig:beta_sequence}
\end{figure}


There are some choices in how the coarse basis functions are updated.  The
simplest one perhaps is a Jacobi-like correction of the heights:
\begin{equation}
\bvec{\beta} \leftarrow \bvec{\beta} + R \omega \fdmatrix{D}^{-1} \fdvector{r}
\label{eqn:jacobi_like_corrector}
\end{equation}
where $\bvec{\beta}$ is the vector of coarse basis heights, $\omega$ is a
relaxation parameter, $\fdmatrix{D}$ is the diagonal of the (fine) Darcy matrix
$(\fdmatrix{k} \mathbf{v}_i, \mathbf{v}_j)$ used to scale the residual properly,
and $\fdvector{r}$ is the residual velocity (that is, use $C=I$ in the scheme in
Figure~\ref{fig:beta_sequence}).  However, there is a whole other class of
updates we could use: Newton-like schemes~\cite{Dennis_Schnabel_1996}.  The
residual is a rational polynomial function of $\bvec{\beta}$, and we seek a zero
of the function $\fdvector{r}(\bvec{\beta})$.  Newton's method could be applied
directly; this would require solving a linear system involving the Jacobian of
the residual --- a sparse linear system with as many unknowns as degrees of
freedom in the modified coarse basis functions (roughly $2 H/h$ times the number
of coarse cells in 2-D, or $3 (H/h)^2$ in 3-D).  Alternatively, a quasi-Newton
method could be used with an approximation to the Jacobian that is more easily
factored (such as its diagonal or a block diagonal form defined by coarse cell
edges).  Also possible is Broyden's (secant) method; to initialize the
approximation of the Jacobian, there is the exact Jacobian (at the starting
point), the exact Jacobian at the fine-scale solution (if this proves to have a
special structure), an approximation to the Jacobian, or simply the identity.
Initializing with the Jacobian at the starting point would give the same first
step as Newton's method, but different later steps.  Initializing with the
identity would give the same first step as the Jacobi-like scheme, but different
later steps.

Why go through all this trouble?  Newton's method has quadratic convergence.
Each step in the iteration requires only solving upscaled systems (which is
virtually like solving a coarse system) and performing fine-scale matrix-vector
multiplications.  Even Broyden's method has superlinear convergence whereas with
linear problems and linear iterative solvers we expect only linear convergence.
Of course, such fast convergence is only guaranteed starting from a small
distance away from the solution sought.  Here we are (almost) saved:
initializing with uniform heights gives the ordinary upscaled solution, and we
have an a-priori error analysis which says that this solution is already
``close'' to the fine-scale solution.  It is not known whether this is always
close enough to guarantee convergence.
%Some numerical experiments have indicated, though, that this is not close
%enough, and that we may fail to converge (see Figures~\ref{fig:UNK,fig:UNK}).
Even if the starting point of uniform heights is not good enough to guarantee
convergence, some help may be provided by already known trust-region,
line-search, or other global-convergence-aiming modifications to the Newton
scheme.  At least for sure there is only one root to find: we assume that the
fine-scale problem has a unique solution; thus there is only one way to
apportion the heights in the modified coarse basis (define $\bvec{\beta}^*$ so
that $\uh \in \VtwidHh(\bvec{\beta}^*)$).

There is only one way to apportion the heights, but how the scale is chosen is
left open.  For the ordinary, Lagrangian elements, we have height one everywhere
along the coarse edge; the solved-for unknown in the algebraic problem is the
magnitude and direction.  If on the other hand we have varying heights along the
edge, there is some redundancy if we are allowed to specify the absolute height
of each segment along with the solved-for unknown.  In order to obtain a unique
set of heights and resolve the redundancy, a normalization of the heights in the
basis is necessary.  Here again are some choices; the maximal magnitude set to
one, sum of squares set to one, or some other strategy are possibilities.

The choice of normalization has some subtle effects on the analysis of the
problem.  When adjusting the heights using the residual flux along coarse edges,
there is a correspondence between the segments along which we compute the
residual and segments where we can adjust the basis.  Introducing normalization
complicates this.  If on the one hand we choose the maximum strategy, when we
compute the residual function we can just pay attention to the non-maximal
degrees of freedom.  Also, as we get close to the root of the residual function
the normalization does not change.  However, far from the root, a step in the
algorithm may force a change in normalization and which components of the
residual are computed.  If on the other hand we choose the sum-of-squares
strategy, the normalization always changes (even near the root).  Further, the
residual function is harder to characterize: on a given coarse edge, inputs are
points on the sphere, and outputs are anywhere in space.  Updates need to keep
to the sphere (which is not difficult computationally, but complicates the
analysis; maybe some methods of updating are not even viable).  Nonetheless, it
does have some advantages (see the example below, and its discussion of
semi-definiteness in the Jacobian), and a certain analytic appeal.

One other note: among normalization choices, the simplest would be to pick a
particular $\beta_i$ to fix on each coarse edge.  However, this (usually) won't
work.  The true solution may have zero flux on that segment of the coarse edge
giving an analytic problem.  Also, the true solution may have near zero flux
there giving computational problems since the scale is exaggerated (problems
such as poor conditioning, overflow, and cancellation errors).  Other choices of
normalization that exhibit such behavior would be considered poor choices; also,
not normalizing during a computation (to save time) could lead to similarly bad
behavior.  A good choice of normalization should guarantee to not worsen the
condition number (and perhaps improve it).

The use of upscaling (and not more usual finite element methods) is important
here.  There are otherwise too many degrees of freedom in the nonlinear part of
the problem when using simple coarsening.  This is true even in two dimensions
with $H/h=2$; the situation gets worse for larger $H/h$ and in three dimensions.
Upscaling of course is not necessary, but the general intuition is that
nonlinear problems get harder to solve more quickly than linear ones as the
number of unknowns increases.
%Why is mixed subgrid upscaling important?
%Subgrid problems with Neumann conditions on coarse cell problems.  Coarse
%velocities are where we lose info: overlap of extra DOFs in the non-mixed case.

We digress for a moment to show an example of the ideas above, and to shed some
light on normalization and its effects.  Let us apply these ideas to Laplace's
equation in two dimensions on the unit square.  Assume there are homogeneous
Neumann boundary conditions, and use a regular grid with only two coarse cells
(so that there is only one coarse edge with flow).  Suppose that there is a
three-by-three subgrid in each coarse cell to make for a six-by-three grid of
fine cells, and that RT0 elements are used (for the coarse and subgrid scales in
the upscaled problem, and everywhere in the fine-scale problem).  We will ignore
the gravity term.  The source term $q$ was chosen to be randomly either 0 or 1
on fine cells.

%   example: 1-D Laplacian w/linear elements and $H/h = 2$
%      bad example!  overlap in DOFs
%      why upscale? (even in 1-D with $H/h>2$; gets worse in 2-D and 3-D)
%Let us apply these ideas to Laplace's equation in one dimension.  We want to
%approximate the solution to $(u',v')=(f,v)$ for every $v \in H^1_0(0,1)$.  We will use
%linear finite elements on a regular grid.  Let $V_h$ be the subset of
%$H^1_0(0,1)$ of continuous piecewise linear functions (with $h=1/n$ for some
%\emph{even} integer $n$).  For the Lagrangian basis for $V_h$ we get
%$(v_i,v_i)=2/h$, $(v_i,v_{i\pm 1})=-1/h$, and $(v_i,v_j)=0$ for
%$j\neq i-1,i,i+1$.

The analytic form of the objective function
\begin{equation}
\mathbf{F} = (2/3) \fdmatrix{D}^{-1} \fdvector{r}
\label{eqn:sample_obj_fcn}
\end{equation}
is $\left(14175(\beta_1+\beta_2+\beta_3)^2 \right)^{-1}$ times
\begin{equation}
\left(
\begin{array}{c}
-4140\beta_2^2 -1939\beta_2\beta_3 -8633\beta_3^2 +\beta_1 (6694\beta_2+10133\beta_3) \\
2 (-3347\beta_1^2 +(2445\beta_2-2972\beta_3) \beta_3 +\beta_1 (2070\beta_2+3064\beta_3)) \\
-10133\beta_1^2 +2\beta_2 (-2445\beta_2+2972\beta_3) +\beta_1 (-4189\beta_2+8633\beta_3) \\
\end{array}
\right).
\label{eqn:obj_fcn}
\end{equation}
(This definition is slightly different than discussed above; it includes the
scaling effect of $\mathbf{D}$.)  Then the root $\bvec{\beta}^*$ defined
by $\mathbf{F}(\bvec{\beta}^*)=\mathbf{0}$ has
\begin{equation}
\beta_1^* = \frac{179797}{223672} \beta_3^*
\qquad\mbox{and}\qquad
\beta_2^* = \frac{290873}{447344} \beta_3^*.
\label{eqn:obj_fcn_roots}
\end{equation}
This $\bvec{\beta}^*$ is the same one that gives us $\uh \in
\VtwidHh(\bvec{\beta}^*)$.  The level curves for $\beta_3=1$ and $F_i=0$ are
shown in Figure~\ref{fig:obj_fcn_level_curves}.  In order to detect faux roots,
some ``vertical'' scale is necessary.  As a reference, the uniform weights
$\bvec{\beta}=\mathbf{1}$ are reasonable, and
\begin{equation}
\mathbf{F}(\mathbf{1})=(\frac{47}{2835},\frac{8}{405},-\frac{103}{2835})^{\mathrm{T}}.
\label{eqn:obj_fcn_ref_levels}
\end{equation}
Level curves with $F_i$ equal to 0 and plus or
minus 10\% the values in the reference are shown in
Figure~\ref{fig:obj_fcn_faux_roots}.  Using this criterion to detect whether a
Newton-like scheme might be fooled by values of the objective function near
zero, there seem to be no faux roots.


\begin{figure}[htbp]
\centerline{
\begin{tabular}{c c c}
\includegraphics[keepaspectratio=true,width=2.25in]{nonlinear/level-curves-big} &
\includegraphics[keepaspectratio=true,width=2.25in]{nonlinear/level-curves-med} &
\includegraphics[keepaspectratio=true,width=2.25in]{nonlinear/level-curves-sm} \\
\end{tabular}}
\caption[Zero curves for the objective function.]{Level curves for each
  component of the objective function in~\eqref{eqn:obj_fcn} with
  $\beta_3=1$.  Only the zero level is shown.  The curves are shown at three
  different magnifications; at larger magnifications the hyperbolic-like curves
  simply extend their trends (they do not intersect).  There is only one
  location where the curves from all three components intersect.  For reference,
  the level curve of the third component is the circular curve.  The level curve
  of the first component touches the top-center of the middle figure; the second
  touches top-center.}
\label{fig:obj_fcn_level_curves}
\end{figure}


\begin{figure}[htbp]
\centerline{
\begin{tabular}{c c}
\includegraphics[keepaspectratio=true,width=3in]{nonlinear/faux-roots-med} &
\includegraphics[keepaspectratio=true,width=3in]{nonlinear/faux-roots-sm} \\
\end{tabular}}
\caption[Faux roots of the objective function.]{Level curves for each
  component of the objective function in~\eqref{eqn:obj_fcn} with
  $\beta_3=1$.  Curves at the zero level as well as $\pm10\%$ the reference
  level in~\eqref{eqn:obj_fcn_ref_levels} are shown.}
\label{fig:obj_fcn_faux_roots}
\end{figure}


The Jacobian of the objective function $\mathbf{F}$ from~\eqref{eqn:obj_fcn} is
$\left(14175(\beta_1+\beta_2+\beta_3)^3\right)^{-1}$ times %the first column
\begin{multline}
\left(
\begin{array}{c}
14974 \beta_2^2 + 20705 \beta_2 \beta_3 + 27399 
\beta_3^2 - \beta_1 (6694 \beta_2 + 10133 \beta_3) \\
%
6694 \beta_1^2 + \beta_3 (-6341 \beta_2 + 15327 \beta_3) - 
\beta_1 (14974 \beta_2 + 15511 \beta_3) \\
%
10133 \beta_1^2 + \beta_2 (6341 \beta_2 - 15327 \beta_3) - 
\beta_1 (5194 \beta_2 + 27399 \beta_3) \\
\end{array}
\right. \\
%\intertext{the second column}
\begin{array}{c}
4140 \beta_2^2 + 488 \beta_2 \beta_3 + 18016 \beta_3^2 - 
28 \beta_1 (626 \beta_2 + 697 \beta_3) \\
%
2 (8764 \beta_1^2 - \beta_1 (2070 \beta_2 + 1613 
\beta_3) + \beta_3 (-2445 \beta_2 + 8389 \beta_3)) \\
%
2 (9758 \beta_1^2 + \beta_1 (1369 \beta_2 - 9008 \beta_3) + 
\beta_2 (2445 \beta_2 - 8389 \beta_3)) \\
\end{array} \\
%\intertext{and the third column}
\left.
\begin{array}{c}
5591 \beta_2^2 - 7444 \beta_2 \beta_3 + 8633 \beta_3^2 - 
3 \beta_1 (5359 \beta_2 + 9633 \beta_3) \\
%
16077 \beta_1^2 + 4 \beta_3 (-3931 \beta_2 + 1486 \beta_3)
- \beta_1 (5591 \beta_2 + 15511 \beta_3) \\
%
28899 \beta_1^2 + \beta_1 (22955 \beta_2 - 8633 \beta_3) + 
4 \beta_2 (3931 \beta_2 - 1486 \beta_3) \\
\end{array}
\right).
\end{multline}
%6694 \beta_2 + 10133 \beta_3 &
%-13388 \beta_1 + 4140 \beta_2 + 6128 \beta_3 &
%-20266 \beta_1 - 4189 \beta_2 + 8633 \beta_3 \\
%6694 \beta_1 - 8280 \beta_2 - 1939 \beta_3 &
%4140 \beta_1 + 4890 \beta_3 &
%-4189 \beta_1 - 9780 \beta_2 + 5944 \beta_3 \\
%10133 \beta_1 - 1939 \beta_2 - 17266 \beta_3 &
%6128 \beta_1 + 4890 \beta_2 - 11888 \beta_3 &
%8633 \beta_1 + 5944 \beta_2 \\
At the ``initial'' value of uniform weights $\bvec{\beta}=\mathbf{1}$, the
Jacobian has eigenvalues of $0$, $\frac{13}{135}$, and $\frac{9383}{42525}$.  At
the root of the objective function~\eqref{eqn:obj_fcn_roots}, the
Jacobian has eigenvalues $0$, $\sim\!\!0.120153$, and $\sim\!\!0.272359$ divided
by $\beta_3$ (that is, the eigenvalues depend on the scaling/normalization
scheme).  The Jacobian is indefinite because we have not normalized the
$\bvec{\beta}$'s, but otherwise the Jacobian is positive (semi-)definite.
Whether or not the Jacobian is positive semi-definite everywhere remains
unknown; for sure, the Jacobian has a zero eigenvalue everywhere.

The convergence history of the Jacobi-like scheme
of~\eqref{eqn:jacobi_like_corrector} is shown in
Figure~\ref{fig:jacobi_conv_hist}.  The linear rate of convergence of the scheme
is apparent.  The approach of the iterates towards the root (shown at right)
seems to be orthogonal to the level curve of the second component.  The
significance of this (if any) is not known.


\begin{figure}[htbp]
\centerline{
\begin{tabular}{c c}
\raisebox{0.75in}[0pt]{\includegraphics[keepaspectratio=true,width=3in]{nonlinear/jacobi-like-conv-hist}} &
\includegraphics[keepaspectratio=true,width=3in]{nonlinear/jacobi-like-conv-graph} \\
%\includegraphics[width=3.25in,height=2.75in]{nonlinear/jacobi-like-conv-hist} &
%\includegraphics[keepaspectratio=true,height=2.75in]{nonlinear/jacobi-like-conv-graph} \\
\end{tabular}}
\caption[Convergence of the Jacobi-like scheme.]{Convergence of the Jacobi-like
  scheme of~\eqref{eqn:jacobi_like_corrector} as applied to the sample
  problem.  The left diagram shows the base-10 logarithms of the $l_1$ size of
  the error in $\bvec{\beta}$ (top line) and values of the corresponding
  objective function $\mathbf{F}(\bvec{\beta})$ (bottom line) versus iteration number.
  The right diagram plots the points $(\beta_1,\beta_2)$ for each iteration
  ($\beta_3$ has been normalized to 1 --- it is the maximal component for every
  iteration).  Level curves for each component of $\mathbf{F}$ are shown for reference.}
\label{fig:jacobi_conv_hist}
\end{figure}


The convergence history of Newton's method is shown in
Figure~\ref{fig:newton_conv_hist}.  The quadratic rate of convergence of the
scheme is apparent.  Note the vertical scale: after just 10 iterations
$\bvec{\beta}$ has a mean-square error of just $10^{-640}$!  In applying
Newton's method, there is the problem of semi-definiteness of the Jacobian.  To
overcome this, the pseudo-inverse was used instead.
% 2-norm 'cuz of the pseudo-inverse
Also, no graph of the iterates is shown (to mirror the right diagram of
Figure~\ref{fig:jacobi_conv_hist}) because the iterates converge too quickly.
At any given magnification, only one iterate at a time is distinguishable from
the root (the intersection of the zero-level curves).


\begin{figure}[htbp]
\centerline{
\includegraphics[keepaspectratio=true,width=4in]{nonlinear/newton-conv-hist}
}
\caption[Convergence of Newton's method.]{Convergence of Newton's method as
  applied to the sample problem.  The diagram shows on a semilog scale the $l_2$
  size of the error in $\bvec{\beta}$ versus iteration number.}
\label{fig:newton_conv_hist}
\end{figure}


In the near future, diagrams showing the convergence of the scheme for problems
with an arbitrary permeability will be possible.  Problems with a gravity term
will be included, too.  In addition to the Jacobi-like scheme and Newton's
method, a quasi-Newton using the diagonal of the Jacobian will be implemented.
Broyden's method initialized the Jacobian at starting (and ending) point, the
diagonal of the Jacobian at the same, and identity matrix will be implemented as
well.

To summarize, the proposed research for this topic is to develop tools necessary
to prove that this algorithm converges, and can be implemented in a manner that
is efficient, accurate, and stable.  For convergence, we need to be able to
measure progress made in an iterative scheme: what is the relationship between
the extra degrees of freedom in the modified coarse basis and the error in the
computed upscaled solution.  Along the same lines, we need to better measure the
closeness of the upscaled and fine solutions (both unmodified and modified); how
close is the starting point in the nonlinear optimization?  We also need to know
that our approximation to the Jacobian possesses bounded deterioration.

The approximation to the Jacobian is also key to the efficiency of the method:
iteration counts and iteration cost are affected.  Can we get away with a secant
method for simplicity?  And initialize it with the identity or the diagonal of
the Jacobian (at the initial or final points)?  If we use a quasi-Newton method,
how sparse an approximation of the Jacobian can we use?  Diagonal, block
diagonal from coarse cell edges, or the full Jacobian?  These all affect the
storage required, the parallelism we can exploit, and the amount of arithmetic
necessary per iteration and per simulation.  Features of conjugate gradient,
truncated Newton, Krylov subspace, and limited-memory methods~\cite{Kelley_1995}
are applicable here.  Further, can we exploit the analytic form of the residual
to speed finding the root?  Lastly, how does normalization of the extra degrees
of freedom impact the performance of the algorithm as regarding global
convergence (if we start close, it does not matter) and implementation?  How
does normalization affect the semi-definiteness of the Jacobian?  In the update
step (step six of Figure~\ref{fig:beta_sequence}), can we update using any size
corrector and normalize the result?  That is, the normalized $\mathbf{\beta}$'s
are not members of a vector space; so what algebra of the ``+'' in the update is
appropriate?  % multiplication is the natural operation on SU(2)

% figure: uzawa-like schematic
% what does this have to do with saddle points?  anything?
%
% another way to pose the problem
%   look at fine-scale basis and segregate the upscaled DOFs from the non-upscaled
%   diagram from third pink sheet in folder

Lastly, this nonlinear optimization idea might well be applied to DG and the
expanded mixed method.  It might also be applied to making $hp$-refinements: let
newly-added degrees of freedom from a refinement be the $\bvec{\beta}$'s.  The
solution of the unrefined problem is the initial guess for Newton's method.

%----------------------------------------------
\section{Applications to Modeling Flow in Porous Media}
\label{sec:applications}
%----------------------------------------------

Efficient, accurate Darcy flow
simulations~\cite{Peaceman_1977,Chavent_Jaffre_1986,Lake_1989} are the prime
motivation for this research.  However, in order to make this research useful to
practitioners, the performance of the proposed algorithms in different
geostatistical settings~\cite{Sahimi_1995,Deutsch_Journel_1997} needs to be
investigated.  That is, a guide for practitioners' use needs to be developed by
running our code on a number of geostatistical realizations of permeability
fields, and analyzing the performance for strengths and weaknesses as a function
of geostatistical (and other physical) parameters.

%description of geostatistical settings.
%Lake's EOR~\cite{Lake_1989}?
%sahimi_1995
%GSLIB~\cite{Deutsch_Journel_1997}?
%Guy Chavent and Jerome Jaffre.
% Mathematical models and finite elements for reservoir simulation : single phase,
% multiphase, and multicomponent flows through porous media.  New York: Elsevier
% Science Pub. Co., 1986.  SERIES: Studies in mathematics and its applications. v. 17
% Chavent_Jaffre_1986
%Peaceman_1977 TN 871 P37 Geology Library
%Gordon W. Thomas.
% Principles of hydrocarbon reservoir simulation
% Boston : International Human Resources Development Corp., c1982.
% TN 871 T45 1982 Engineering Library
%Henry B. Crichlow
% Modern reservoir engineering : a simulation approach
% Englewood Cliffs, N.J. : Prentice-Hall, c1977.
% TN 871 C694 Geology Library

Performance can be measured by how quickly and how well large-scale features of
the flow can be approximated by the algorithms.  What effect, if any, the
geostatistics of the problem data have on this performance needs to be
described.  Also, if long-range correlations in the problem data present any
theoretical or practical difficulties for the algorithms, this should be noted
and detailed.

Performance can also be measured by how accurately the algorithms predict
break-through times in two-phase simulations.  In some sense, the algorithms
will always be successful because they produce the same flow field as a full
fine-scale simulation.  However, the algorithms are iterative in nature so they
introduce a truncation error on top of that of the discretization.  Whether or
not this has a strong effect on predicted break-through times needs to be
investigated.

It would also be worthwhile to draw some engineering conclusions about the
effect of long-range correlations on flow, or perhaps the particular flow
features of a few specific natural formations.

Lastly, as a matter for testing, evaluation, and practical use, a
well-documented implementation of the proposed algorithms in parallel for three
dimensional problems is necessary.  Perhaps it can be extended to handle
two-phase miscible and/or immiscible displacements.  It should be able to model
both rate wells and Peaceman bottom-hole-pressure
wells~\cite{Peaceman_1978,Peaceman_1983}.

%----------------------------------------------
\section{Software}
\label{sec:software}
%----------------------------------------------

Demonstration software has been developed to solve systems using the ideas
in this proposal.  Additional software was developed to produce the
eigenstructure diagrams for the two-level scheme and the analytic expressions
for the residual in the nonlinear optimization scheme.

Although some of this software is fully parallel and can work on problems in
three dimensions, not all of the software can.  Also, not all the software
exploits the sparsity or the special structure of the upscaling problems.

As part of the dissertation, it is proposed to further the development of the
software already produced.  This includes a stand-alone package to solve
elliptic problems (this package could also be integrated into
Parssim~\cite{arbogast_parssim1} or IPARS~\cite{WYWADPS_1997,WABELPY_1999}), and
analysis tools for the practitioner (along the lines of the eigenstructure
diagrams) to help guide the use of and parameter setting in the solver.  This
software should be fully parallel and be able to handle three dimensional
problems.  It should exploit the sparsity and special structure of the methods.
Lastly, it should be modular, readable, and (self-)documented.

%----------------------------------------------
%\section{References}
%\label{sec:refs}
%\addtocontents{toc}{References}
\addcontentsline{toc}{section}{References}
%in preamble: \bibliographystyle{plain}
\bibliography{proposal,arbogast_cv,arbogast_refs}
%----------------------------------------------


\end{document}


\relax
\bye