\documentclass[12pt]{book}
\usepackage{amsmath,amssymb,amsthm,amsbsy,amsfonts}
%\usepackage{mathrsfs} % Fancy script letters with \mathscr{}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% macros
\newcommand{\define}[1]{{\bf \boldmath{#1}}}
\newcommand{\vdot}{\cdot} %{{\, \boldsymbol \cdot\, }}
\newcommand{\curl}{{\nabla\cross}}
\newcommand{\grad}{{\nabla}}
\newcommand{\qdot}{\dot{q}}
\newcommand{\pa}{\partial}
\newcommand{\gapbelow}{\raisebox{-3pt}[0pt][6pt]{}}
\newcommand{\gapabove}{\raisebox{3pt}[15pt][0pt]{}}
\newcommand{\gapleft}{\makebox[12pt]{}}
\newcommand{\gapright}{\makebox[12pt]{}}
\newcommand{\eqngapabove}{\raisebox{15pt}[20pt][0pt]{}}
\newcommand{\eqngapbelow}{\raisebox{-15pt}[0pt][15pt]{}}
\newcommand{\pdot}{\dot{p}}
\newcommand{\vect}[1]{\mathbf{#1}}
\newcommand{\ssz}{\scriptsize}
\newcommand{\norm}[1]{\lVert#1\rVert}
\newcommand{\fvect}[1]{\text{\textsf{\textit{#1}}}} %For 4-vectors.
\newcommand{\be}{\begin{equation}}
\newcommand{\ee}{\end{equation}}
\newcommand{\dint}{\displaystyle\int} % Use for enforcing large
% integral symbol eg. in arrays.
%% Included in above \input header are:
%%
\usepackage{amsmath,amsthm}
%\usepackage{mathrsfs} % Fancy script letters with \mathscr{}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{wrapfig}
%\usepackage{lscape}
%\usepackage{wasysym}
%\usepackage{textcomp}
%%
%% XY-Pic Options
%%
\usepackage[matrix,arrow,curve,arc]{xy}
%%
%%Set the default xypic coordinate system.
%%
%\everyxy={0:<1cm,0cm>}
%%
%% Graphics Package Options
%%
\usepackage{graphicx}
\usepackage{color}
%% Image file formats accepted for graphicx:
\usepackage{psfrag}
\DeclareGraphicsExtensions{.eps,.ps}
%\DeclareGraphicsExtensions{.pdf,.png,.jpg}
\usepackage{subfigure}
%%
%% BibTeX Packages?
%%
%\usepackage{apacite}
%%
%% Fancy Headers
%%
\usepackage{fancyhdr}
\pagestyle{fancy}
%%
%% PDF Options: Recommend NOT USING dvippdf, use pdflatex instead.
%% Alternatively, is you are using .eps graphics use the hyperref
%% [ps2pdf] option and run latex-dvips-ps2pdf
%%
% Thumbnails is not needed if using the dvipdfm package.
%\usepackage{thumbpdf}
\usepackage[dvips,ps2pdf,pdftitle={Classical Mechanics},
pdfauthor={John C. Baez and Derek K. Wise},
pdfsubject={classical mechanics},
pdfkeywords={physics, classical mechanics, Lagrangians, action principle, Hamiltonian, Lie groups, relativity},
pdfpagemode={UseOutlines},
bookmarks,bookmarksopen,pdfstartview={FitH},
colorlinks,linkcolor={blue},citecolor={red}]{hyperref}
% formatting
%\raggedright
%
%\setlength{\topmargin}{0pt}
%\setlength{\textheight}{4.5in}
%\setlength{\paperheight}{5.2in}
%\setlength{\voffset}{-1.2in}
%
%\setlength{\textwidth}{3.8in}
%\setlength{\paperwidth}{4.2in}
%\setlength{\hoffset}{-1in}
%
%\setlength{\oddsidemargin}{.2in}
%\setlength{\evensidemargin}{.2in}
%\setlength{\topmargin}{0in}
%
%
%
%
%\setlength{\parindent}{2ex}
\setlength{\textwidth}{16cm}
\setlength{\textheight}{21cm} % For US Letter
\setlength{\oddsidemargin}{0.51cm}
\setlength{\evensidemargin}{0cm}
\setlength{\topmargin}{0.0cm}
\setlength{\headsep}{1.5cm}
\setlength{\footskip}{1cm}
\headheight=15pt
\setlength{\headwidth}{\textwidth}
\addtolength{\headwidth}{\marginparsep}
%%
%% remember chapter title
%%
\renewcommand{\chaptermark}[1]{\markboth{\sl\ #1}{\sl\thechapter.\ #1}}
\renewcommand{\sectionmark}[1]{\markright{\sl\thesection\ #1}}
% section number and title
%%
%% Custom header and footers
%%
\lhead[\fancyplain{}{\sl\thepage}]{\fancyplain{}{\sl\rightmark}}
\rhead[\fancyplain{}{\sl\leftmark}]{\fancyplain{}{\sl\thepage}}
\cfoot{}
\renewcommand{\headrulewidth}{0pt}
\newcommand{\cross}{\times}
\setcounter{secnumdepth}{2}
%%
%% Custom Symbols
%%
\newcommand{\defeq}{\, :=\,}
%%
%% Theorems, Examples, and Definitions
%%
\newtheorem{theorem}{Theorem}[chapter]
\newtheorem{definition}{Definition}[chapter]
%
% Make distinct Chapter and Appendix headings.
\newcommand{\ch}[1]{%
\clearpage{\pagestyle{empty}\cleardoublepage} %makes sure no header opposite chapter start page.
\addtocounter{chapter}{1}
\chaptermark{#1}
\addtocontents{toc}{\contentsline {chapter}{\numberline {\arabic{chapter}} #1}{\arabic{page}}}
\makebox{} \vspace{3cm}\\ {\noindent \Large \textbf{Lecture\ \arabic{chapter}. \vspace{10pt} \\ \noindent {#1} \\}}
\rule{16cm}{1pt} \vspace{5ex} \\ }
%\begin{picture}(16,1)
% \setlength{\unitlength}{1cm}
% \put(0,1){\line(1,0){16}}
% \end{picture}}
%
%\newcommand{\app}[1]{%
% \makebox[1cm]{} \\%
% \chapter{#1}%
% \begin{picture}(16,1)%
% \setlength{\unitlength}{1cm}%
% \put(0,1){\line(1,0){16}}%
% \end{picture}}
%%
%\includeonly{}
\title{\textbf{Lectures on \\ Classical Mechanics}}
\author{\textbf{John C. Baez}
\\ \textbf{Derek K. Wise}
}
\date{(version of \today)}
\begin{document}
\maketitle
\pagenumbering{roman}
\frontmatter
\clearpage
\vspace{10ex} \copyright 2017 John C. Baez \& Derek K. Wise
\cleardoublepage
%%%%%%%%%%%%%%%%%%%%% PREFACE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{Preface}
\label{preface}
These are notes for a mathematics graduate course on classical
mechanics at U.C.\ Riverside. I've taught this course three times
recently. Twice I focused on the Hamiltonian approach. In 2005
I started with the Lagrangian approach, with a heavy emphasis on
action principles, and derived the Hamiltonian approach from that.
This approach seems more coherent.
Derek Wise took beautiful handwritten notes on the 2005 course,
which can be found on my website:
\begin{center}
\texttt{http://math.ucr.edu/home/baez/classical/}
\end{center}
\noindent
Later, Blair Smith from Louisiana State University miraculously
appeared and volunteered to turn the notes into \LaTeX\ .
While not yet the book I'd eventually like to write, the result
may already be helpful for people interested in the mathematics
of classical mechanics.
The chapters in this \LaTeX\ version are in the same order as the weekly
lectures, but I've merged weeks together, and sometimes split them
over chapter, to obtain a more textbook feel to these notes. For
reference, the weekly lectures are outlined here.
Week 1: (Mar. 28, 30, Apr. 1)---The Lagrangian approach to classical
mechanics: deriving $F = ma$ from the requirement that the particle's
path be a critical point of the action. The prehistory of the
Lagrangian approach: D'Alembert's ``principle of least energy'' in
statics, Fermat's ``principle of least time'' in optics, and how
D'Alembert generalized his principle from statics to dynamics using
the concept of ``inertia force''.
Week 2: (Apr. 4, 6, 8)---Deriving the Euler--Lagrange equations for a
particle on an arbitrary manifold. Generalized momentum and
force. Noether's theorem on conserved quantities coming from
symmetries. Examples of conserved quantities: energy, momentum and
angular momentum.
Week 3 (Apr. 11, 13, 15)---Example problems: (1) The Atwood
machine. (2) A frictionless mass on a table attached to a string
threaded through a hole in the table, with a mass hanging on the
string. (3) A special-relativistic free particle: two Lagrangians, one
with reparametrization invariance as a gauge symmetry. (4) A
special-relativistic charged particle in an electromagnetic field.
Week 4 (Apr. 18, 20, 22)---More example problems: (4) A
special-relativistic charged particle in an electromagnetic field in
special relativity, continued. (5) A general-relativistic free
particle.
Week 5 (Apr. 25, 27, 29)---How Jacobi unified Fermat's principle of
least time and Lagrange's principle of least action by seeing the
classical mechanics of a particle in a potential as a special case of
optics with a position-dependent index of refraction. The ubiquity of
geodesic motion. Kaluza-Klein theory. From Lagrangians to
Hamiltonians.
Week 6 (May 2, 4, 6)---From Lagrangians to Hamiltonians,
continued. Regular and strongly regular Lagrangians. The cotangent
bundle as phase space. Hamilton's equations. Getting Hamilton's
equations directly from a least action principle.
Week 7 (May 9, 11, 13)---Waves versus particles: the Hamilton-Jacobi
equation. Hamilton's principal function and extended phase space. How
the Hamilton-Jacobi equation foreshadows quantum mechanics.
Week 8 (May 16, 18, 20)---Towards symplectic geometry. The canonical
1-form and the symplectic 2-form on the cotangent bundle. Hamilton's
equations on a symplectic manifold. Darboux's theorem.
Week 9 (May 23, 25, 27)---Poisson brackets. The Schr\"odinger picture
versus the Heisenberg picture in classical mechanics. The Hamiltonian
version of Noether's theorem. Poisson algebras and Poisson
manifolds. A Poisson manifold that is not symplectic. Liouville's
theorem. Weil's formula.
Week 10 (June 1, 3, 5)---A taste of geometric quantization. K\"{a}hler
manifolds.
If you find errors in these notes, please email me! I thank Sheeyun
Park and Curtis Vinson for catching lots of errors.
\vskip 1em
\hfill {John C.\ Baez}
\tableofcontents
\mainmatter
%%%%%%%%%%%%%%%%%%%%% CHAPTER 1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\pagenumbering{arabic}
%%
%%
%%
\chapter{From Newtonian to Lagrangian Mechanics}
\label{ch:Foundations}
% \noindent (Week 1, March 28, 30, April 1.)\vspace{2ex}
Classical mechanics is a peculiar branch of physics with a long
history. It used to be considered the sum total of our theoretical
knowledge of the physical universe (Laplace's daemon, the Newtonian
clockwork), but now it is known as an idealization, a toy model if you
will. The astounding thing is that probably all professional applied
physicists still use classical mechanics. So it is still an
indispensable part of any physicist's or engineer's education.
It is so useful because the more accurate theories that we know of
(general relativity and quantum mechanics) make corrections to
classical mechanics generally only in extreme situations (black holes,
neutron stars, atomic structure, superconductivity, and so forth).
Given that general relativity and quantum mechanics are much harder
theories to apply, it is no wonder that scientists revert to classical
mechanics whenever possible.
So, what is classical mechanics?
%%
%%
%%
\section{Lagrangian and Newtonian Approaches}
\label{sec:Lagrangian_and_Newtonian}
We begin by comparing the Newtonian approach to mechanics to the
subtler approach of Lagrangian mechanics. Recall Newton's law:
\begin{equation}
F=ma
\end{equation}
wherein we consider a particle moving in $\mathbb{R}^n$.
Its position, say $q$, depends on time $t\in\mathbb{R}$,
so it defines a function,
\[
q \colon \mathbb{R}\longrightarrow\mathbb{R}^n.
\]
From this function we can define \textbf{velocity},
\[
v=\dot{q} \colon \mathbb{R}\longrightarrow\mathbb{R}^n
\]
where $\dot{q}=\frac{dq}{dt}$, and also \textbf{acceleration},
\[
a=\ddot{q} \colon \mathbb{R}\longrightarrow\mathbb{R}^n.
\]
Now let $m>0$ be the \textbf{mass} of the particle, and let $F$ be a
vector field on $\mathbb{R}^n$ called the \textbf{force}. Newton claimed
that the particle satisfies $F=ma$. That is:
\begin{equation}
m\,a(t)=F\left(q(t)\right).
\end{equation}
This is a 2nd-order differential equation
for $q\colon\mathbb{R}\rightarrow\mathbb{R}^n$ which will have a unique
solution given some $q(t_0)$ and $\dot{q}(t_0)$, provided the vector
field $F$ is `nice' --- by which we technically mean smooth and bounded
(i.e., $|F(x)|**0$, for all $x\in\mathbb{R}^n$).
We can then define a quantity called \textbf{kinetic energy}:
\begin{equation}
K(t) \defeq \frac{1}{2}m\,v(t)\cdot v(t).
\end{equation}
This quantity is interesting because
\[
\begin{split}
\frac{d}{dt}K(t) &= m\,v(t)\cdot a(t) \\
&= F(q(t))\cdot v(t).
\end{split}
\]
So, kinetic energy goes up when you push an object in the direction
of its velocity, and goes down when you push it in the opposite
direction. Moreover,
\[
\begin{split}
K(t_1)-K(t_0) &= \int_{t_0}^{t_1} F(q(t))\cdot v(t)\,dt \\
&= \int_{t_0}^{t_1} F(q(t))\cdot \dot{q}(t)\, dt.
\end{split}
\]
So, the change of kinetic energy is equal to the \textbf{work} done by the
force, that is, the integral of $F$ along the curve
$q \colon [t_0,t_1]\rightarrow\mathbb{R}^n$. In 3 dimensions,
Stokes' theorem
relating line integrals to surface integrals of the curl implies
that the change
in kinetic energy $K(t_1)-K(t_0)$ is independent of the curve going
from $q(t_0)=a$ to $q(t_1)=b$ iff
\[
\curl F = 0.
\]
This in turn is true iff
\begin{equation}
F=-\grad V
\end{equation}
for some function $V\colon\mathbb{R}^n\rightarrow\mathbb{R}$.
In fact, this conclusion is true even when $n \ne 3$, using a more
general version of Stokes' theorem: the integral of $F$ along a curve
in $\mathbb{R}^n$ depends only on the endpoints of this curve iff $F =
-\grad V$ for some function $V$. Moreover, this function is then
unique up to an additive constant; we call this function the
\textbf{potential}. A force with this property is called
\textbf{conservative}. Why? Because in this case we can define the
\textbf{total energy} of the particle by
\begin{equation}
E(t)\defeq K(t) + V(q(t))
\end{equation}
where $V(t):=V(q(t))$ is called the \textbf{potential energy} of
the particle, and then we can show that $E$ is \textbf{conserved}:
that is, constant as a function of time. To see this, note that
$F = ma$ implies
\[
\begin{split}
\frac{d}{dt}\left[K(t)+V(q(t))\right] &= F(q(t))\cdot v(t) +
\grad V(q(t))\cdot v(t) \\
&= 0, \qquad\text{(because $F=-\grad V$)}.
\end{split}
\]
Conservative forces let us apply a bunch of cool techniques.
In the Lagrangian approach we define a quantity
\begin{equation}
L\defeq K(t)-V(q(t))
\end{equation}
called the \textbf{Lagrangian}, and for any curve
$q \colon [t_0,t_1]\rightarrow\mathbb{R}^n$ with $q(t_0)=a$, $q(t_1)=b$, we
define the \textbf{action} to be
\begin{equation}
S(q)\defeq \int_{t_0}^{t_1}L(t)\,dt.
\end{equation}
From here one can go in two directions. One is to claim that nature causes
particles to follow paths of least action, and derive Newton's equations
from that principle. The other is to start with Newton's principles and
find out what conditions, if any, on $S(q)$ follow from this. We will use the
shortcut of hindsight, bypass the philosophy, and simply use the
mathematics of variational calculus to show that particles follow paths
that are `critical points' of the action $S(q)$ if and only if Newton's
law $F=ma$ holds.
\begin{figure}[ht]
\centering
\psfrag{Rn}{$\mathbb{R}^n$}\psfrag{R}{$\mathbb{R}$}
\psfrag{q}{$q$}\psfrag{qs+sdq}{$q_s$}
\psfrag{t0}{$t_0$}\psfrag{t1}{$t_1$}
\includegraphics[width=.5\textwidth]{images/path-least-action}
\caption{A particle can sniff out the path of least action.}
\label{fig:path-least-action}
\end{figure}
To do this, let us look for curves (like the solid line in
Fig.~\ref{fig:path-least-action}) that are \textbf{critical points}
of $S$, namely:
\begin{equation}
\left. \frac{d}{ds}S(q_s)\right|_{s=0} = 0
\end{equation}
where
\[
q_s = q + s\delta q
\]
for all $\delta q \colon [t_0,t_1]\rightarrow\mathbb{R}^n$ with
\[
\delta q(t_0)=\delta q(t_1)=0.
\]
To show that
\begin{equation}
F=ma \quad \Leftrightarrow \quad
\left.\frac{d}{ds} S(q_s)\right|_{s=0}=0 \;
\textrm{for all} \; \delta q\colon [t_0,t_1]\rightarrow\mathbb{R}^n
\; \textrm{with}\; \delta q(t_0)=\delta q(t_1)=0
\end{equation}
we start by using the definition of the action and the chain rule:
\[
\begin{split}
\left.\frac{d}{ds}S(q_s)\right|_{s=0} &= \frac{d}{ds}\left.\int_{t_0}^{t_1}
\frac{1}{2}m\dot{q}_s(t)\cdot\dot{q}_s(t)-V(q_s(t))\,dt\right|_{s=0} \\ \\
&= \left.\int_{t_0}^{t_1}\frac{d}{ds}\left[
\frac{1}{2}m\dot{q}_s(t) \cdot \dot{q}_s(t) -
V(q_s(t))\right]dt\right|_{s=0} \\ \\
&= \left.\int_{t_0}^{t_1}\left[
m\dot{q}_s \cdot \frac{d}{ds}\dot{q}_s(t)- \grad V(q_s(t)) \cdot \frac{d}{ds}q_s(t)\right]dt\right|_{s=0} \\
\end{split}
\]
Next note that
\[ \frac{d}{ds} q_s (t) = \delta q(t) \]
so
\[ \frac{d}{ds} \dot{q}_s (t) = \frac{d}{ds} \frac{d}{dt} {q}_s (t)
= \frac{d}{dt} \frac{d}{ds} {q}_s (t)
= \frac{d}{dt} \delta{q}(t) .\]
Thus we have
\[
\begin{split}
\left.\frac{d}{ds}S(q_s)\right|_{s=0} &=
\int_{t_0}^{t_1}\left[ m\dot{q} \cdot \frac{d}{dt} \delta q(t) -
\grad V(q(t)) \cdot \delta q(t)\right]dt .
\end{split}
\]
Next we can integrate by parts, noting the boundary terms vanish
because $\delta q=0$ at $t_1$ and $t_0$:
\[
\begin{split}
\frac{d}{ds}S(q_s)\rvert_{s=0}
&= \left.\int_{t_0}^{t_1}\left[ -m\ddot{q}(t)-\grad V(q(t))\right]
\cdot \delta q(t) dt\right. . \\
\end{split}
\]
It follows that variation in the action is zero for \emph{all}
variations $\delta q$ iff the term in brackets is identically
zero, that is,
\[
-m\ddot{q}(t)-\grad V(q(t)) = 0.
\]
So, the path $q$ is a critical point of the action $S$ iff
\[
F=ma.
\]
The above result applies only for conservative forces, i.e., forces
that can be written as minus the gradient of some potential. This is
not true for all forces in nature: for example, the force on a charged
particle in a magnetic field depends on its velocity as well as its position.
However, when we develop the Lagrangian approach further we will see that
it applies to this force as well!
%%
%%
\subsection{Lagrangian versus Hamiltonian Approaches}
I am not sure where to mention this, but before launching into the
history of the Lagrangian approach may be as good a
time as any. In later chapters we will describe another approach
to classical mechanics: the Hamiltonian approach. Why do we need
two approaches, Lagrangian and Hamiltonian?
They both have their own advantages. In
the simplest terms, the Hamiltonian approach focuses on {\it position
and momentum}, while the Lagrangian approach focuses on {\it position
and velocity}. The Hamiltonian approach focuses on {\it energy},
which is a function of position and momentum --- indeed, `Hamiltonian'
is just a fancy word for energy. The Lagrangian approach focuses on
the {\it Lagrangian}, which is a function of position and velocity.
Our first task in understanding Lagrangian mechanics is to get a gut
feeling for what the Lagrangian means. The key is to understand
the integral of the Lagrangian over time -- the `action', $S$. We shall
see that this describes the `total amount that happened' from one
moment to another as a particle traces out a path. And, peeking
ahead to quantum mechanics, the quantity $\exp(iS/\hbar)$, where
$\hbar$ is Planck's constant, will describe the `change in phase'
of a {\it quantum} system as it traces out this path.
In short, while the Lagrangian approach takes a while
to get used to, it provides invaluable insights into classical
mechanics and its relation to quantum mechanics. We shall see
this in more detail soon.
%%
%%
%%
\section{Prehistory of the Lagrangian Approach}
We've seen that a particle going from point $a$ at time $t_0$ to a
point $b$ at time $t_1$ follows a path that is a critical point of the
action,
\[
S = \int_{t_0}^{t_1} K - V\,dt
\]
so that slight changes in its path do not change the action (to first
order). Often, though not always, the action is minimized, so this is
called the \textbf{Principle of Least Action}.
Suppose we did not have the hindsight afforded by the Newtonian
picture. Then we might ask, ``Why does nature like to minimize the
action? And why \emph{this} action $\int K-V\,dt$? Why not some
other action?''
`Why' questions are always tough. Indeed, some people say that
scientists should never ask `why'. This seems too extreme: a more
reasonable attitude is that we should only ask a `why' question if we
expect to learn something scientifically interesting in our attempt to
answer it.
There are certainly some interesting things to learn from the question
``why is action minimized?'' First, note that total energy is
conserved, so energy can slosh back and forth between kinetic and
potential forms. The Lagrangian $L = K - V$ is big when most of the
energy is in kinetic form, and small when most of the energy is in
potential form. Kinetic energy measures how much is `happening' ---
how much our system is moving around. Potential energy measures how
much \emph{could} happen, but isn't yet --- that's what the word
`potential' means. (Imagine a big rock sitting on top of a cliff,
with the potential to fall down.) So, the Lagrangian measures
something we could vaguely refer to as the `activity' or `liveliness'
of a system: the higher the kinetic energy the more lively the system,
the higher the potential energy the less lively. So, we're being told
that nature likes to minimize the total of `liveliness' over time: that
is, the total action.
In other words, nature is as lazy as possible!
For example, consider the path of a thrown rock in the Earth's
gravitational field, as in Fig.~\ref{fig:projectile-motion}.
\begin{figure}[ht]
\centering
\psfrag{K-V_small}{$K-V$ small here\ldots good!}
\psfrag{spend_time_here}{Spend as much time as possible here}
\psfrag{K-V_big}{$K-V$ big\ldots bad!}
\psfrag{get_done_quick}{Get this over with quick!}
\includegraphics[width=.33\textwidth]{images/projectile-motion}
%\includegraphics{images/projectile-motion}
\caption{A particle's ``lazy'' motion minimizes the action.}
\label{fig:projectile-motion}
\end{figure}
The rock traces out a parabola, and we can think of it as doing this
in order to minimize its action. On the one hand, it wants
to spend a lot much time near the top of its trajectory, since this is
where the kinetic energy is least and the potential energy is greatest.
On the other hand, if it spends \emph{too} much time near the top of its
trajectory, it will need to really rush to get up there and get back down,
and this will take a lot of action. The perfect compromise is a parabolic
path!
Here we are anthropomorphizing the rock by saying that it `wants' to
minimize its action. This is okay if we don't take it too seriously.
Indeed, one of the virtues of the Principle of Least Action is that it
lets us put ourselves in the position of some physical system and
imagine what we would do to minimize the action.
There is another way to make progress on understanding `why' action is
minimized: history. Historically there were two principles that were
fairly easy to deduce from observations of nature: (i)~the principle
of least time, used in optics, and (ii)~ the principle of minimum
energy, used in statics. By putting these together, we can guess the
principle of least action. So, let us recall these earlier minimum
principles.
%%
%%
\subsection{The Principle of Least Time}
In 1662, Pierre Fermat pointed out that light obeys the \textbf{principle
of least time}: when a ray of light goes from one point to another, it
chooses the path that takes the last time. It was known much earlier that
moving freely through the air light moves in straight lines, which in
Euclidean space are the shortest paths from one point to another. But
more interesting than straight lines are piecewise straight paths and
curves. Consider reflection of light from a mirror:
\begin{figure}[ht]
\centering
\psfrag{A}{\textsf{A}} \psfrag{B}{\textsf{B}} \psfrag{C}{\textsf{C}}
\psfrag{C'}{\textsf{C}$'$}
\psfrag{q1}{$\theta_1$} \psfrag{q2}{$\theta_2$}
\includegraphics[width=.4\textwidth]{images/light-reflection}
\label{fig:light-reflection}
\end{figure}
What path does the light take? The empirical answer was known at
least since Euclid: it chooses \textsf{B} such that
$\theta_1=\theta_2$, angle of incidence equals the angle of
reflection. But Hero of Alexandria pointed out that this is precisely
the path that minimizes the length of the trajectory subject to the
condition that it must hit the mirror (at least at one point). In
fact light traveling from \textsf{A} to \textsf{B} takes
\emph{both} the straight paths \textsf{ABC} and \textsf{AC}. Why is
\textsf{ABC} the shortest path hitting the mirror? This follows
from some basic Euclidean geometry:
\[
\begin{split}
B\text{ minimizes }AB +BC &\Leftrightarrow B \text{ minimizes }
AB+BC' \\
&\Leftrightarrow A,B,C' \text{ lie on a line} \\
&\Leftrightarrow \theta_1=\theta_2.
\end{split}
\]
Note the introduction of the fictitious image \textsf{C}$'$ ``behind''
the mirror. A similar trick is now used in solving electrostatic
problems: a conducting surface can be replaced by fictitious mirror
image charges to satisfy the boundary conditions. (We also see this method
in geophysics when one has a geological fault, and in hydrodynamics when
there is a boundary between two media.)
However, the big clue came from refraction of light. Consider a ray of
light passing from one medium to another:
\begin{figure}[ht]
\centering
\psfrag{medium 1}{medium 1} \psfrag{medium 2}{medium 2}
\psfrag{q1}{$\theta_1$} \psfrag{q2}{$\theta_2$}
\includegraphics[width=.36\textwidth]{images/snells-law}
\label{fig:snells-law}
\end{figure}
In 984 AD, the Persian scientist Ibn Sahl pointed out that each medium has
some number $n$ associated with it, called its \textbf{index of refraction},
such that
\[
n_1 \sin\theta_1 = n_2\sin\theta_2.
\]
This principle was rediscovered in the 1600s by the Dutch astronomer
Willebrord Snellius, and is usually called \textbf{Snell's law}. It
is fundamental to the design of lenses.
In 1662, Pierre de Fermat pointed out in a letter to a friend that
Snell's law would follow if the speed of light were proportional to
$1/n$ and light minimized the time it takes to get from \textsf{A} to
\textsf{C}. Note: in this case it is the \emph{time} that is
important, not the length of the path. But the same is true for the
law of reflection, since in that case the path of minimum length gives
the same results as the path of minimum time.
So, not only is light the fastest thing around, it's also always taking
the quickest path from here to there!
In fact, this idea seems to go back at least to 1021, when Ibn
al-Haytham, a scientist in Cairo, mentioned it in his
\textsl{Book of Optics}. But the French physicists who formulated the
principle of least action were much more likely to have been influenced
by Fermat.
Fermat's friend, Cureau de la Chambre, was unconvinced:
\begin{quote}
The principle which you take as the basis for your proof, namely that
Nature always acts by using the simplest and shortest paths, is merely
a moral, and not a physical one. It is not, and cannot be, the cause
of any effect in Nature.
\end{quote}
The same philosophical objection is often raised against the principle
of least action. That is part of what makes the principle so interesting:
how does nature ``know'' how to take the principle of least action? The best
explanation so far involves quantum mechanics. But I am getting ahead
of myself here.
%%
%%
\subsection{The Principle of Minimum Energy}
Another principle foreshadowing the principle of least action was the
``principle of minimum energy''. Before physicists really got going in
studying dynamics they thought a lot about statics. \textbf{Dynamics} is
the study of moving objects, while \textbf{statics} is the study of
objects at rest, or in equilibrium.
\begin{figure}[ht]
\centering
\psfrag{L_1}{$L_1$} \psfrag{L_2}{$L_2$}
\psfrag{m_1}{$m_1$} \psfrag{m_2}{$m_2$}
\includegraphics[width=.33\textwidth]{images/lever-statics}
\caption{A principle of energy minimization determines a lever's balance.}
\label{fig:lever-statics}
\end{figure}
For example, Archimedes studied the laws of a see-saw or lever
(Fig.~\ref{fig:lever-statics}), and he found that this would be in
equilibrium if
\[
m_1 L_1 = m_2 L_2.
\]
This can be understood using the ``principle of virtual work'', which was
formalized quite nicely by Johann Bernoulli in 1715. Consider
moving the lever slightly, i.e., infinitesimally,
\begin{figure}[ht]
\centering
\psfrag{dq}{$d\theta$}
\psfrag{dq_1}{$dq_1$}
\psfrag{dq_2}{$dq_2$}
\includegraphics[width=.33\textwidth]{images/lever-perturb}
\caption{A principle of energy minimization determines a lever's balance.}
\label{fig:lever-perturb}
\end{figure}
In equilibrium, the infinitesimal work done by this motion is zero! The
reason is that the work done on the $i^\text{th}$ body is
\[
dW_i = F_i dq_i
\]
and gravity pulls down with a force $m_ig$, so
\[
\begin{split}
dW_1 &= (0,0,-m_1g)\cdot(0,0,-L_1d\theta) \\
&=m_1gL_1\,d\theta
\end{split}
\]
and similarly
\[
dW_2 = -m_2gL_2\,d\theta
\]
The total ``virtual work'' $dW=dW_1+dW_2$ vanishes for all $d\theta$
(that is, for all possible infinitesimal motions) precisely when
\[
m_1L_1 - m_2L_2 =0
\]
which is just as Archimedes wrote.
%%
%%
\subsection{Virtual Work}
Let's go over the above analysis in more detail. I'll try to make it
clear what we mean by \textbf{virtual work}.
The forces and constraints on a system may be time dependent. So
equal small infinitesimal displacements of the system might result in
the forces $\mathbf{F}_{i}$ acting on the system doing different
amounts of work at different times. To \emph{displace a system by
$\delta \mathbf{r}_{i}$ for each position coordinate, and yet remain
consistent with all the constraints and forces at a given instant of
time $t$}, without any time interval passing is called a \emph{virtual
displacement}. It's called `virtual' because it cannot be realized: any
actual displacement would occur over a finite time interval and
possibly during which the forces and constraints might change. Now
call the work done by this hypothetical virtual displacement,
$\textbf{F}_{i}\boldsymbol{\cdot} \delta \textbf{r}_{i}$, the
\textbf{virtual work}. Consider a system in the special state of being
in equilibrium, i.e., when $\sum \textbf{F}_{i}=0$. Then because by
definition the virtual displacements do not change the forces, we must
deduce that the virtual work vanishes for a system in equilibrium,
\begin{equation}
\sum_{i} \mathbf{F}_{i} \boldsymbol{\cdot} \delta \mathbf{r}_{i} =0, \qquad \makebox{(when in equilibrium)}
\end{equation}
Note that in the above example we have two particles in $\mathbb{R}^3$
subject to a constraint (they are pinned to the lever arm). However,
a number $n$ of particles in $\mathbb{R}^3$ can be treated as a single
quasi-particle in $\mathbb{R}^{3n}$, and if there are constraints it
can move in some submanifold of $\mathbb{R}^{3n}$. So ultimately we
need to study a particle on an arbitrary \emph{manifold}. But, we'll
postpone such sophistication for a while.
For a particle in $\mathbb{R}^n$, the principle of virtual work says
\begin{align*}
q(t)&=q_0 \quad\text{satisfies}\quad F=ma, &\quad &\text{(it's in equilibrium)} \\
& \Updownarrow &&\\
dW&=F\cdot dq\quad\text{vanishes for all}\quad dq\in\mathbb{R}^n,
&\quad &\text{(virtual work is zero for $\delta q\rightarrow 0$)}\\
&\Updownarrow &&\\
F&=0, &\quad&\text{(no force on it!)}
\end{align*}
If the force is conservative ($F=-\grad V$) then this is also equivalent to,
\[
\grad V(q_0) = 0
\]
that is, we have equilibrium at a critical point of the potential.
The equilibrium will be \textbf{stable} if $q_0$ is a local minimum of
the potential $V$.
\begin{figure}[ht]
\centering
\psfrag{V}{$V$} \psfrag{Rn}{$\mathbb{R}^n$}
\psfrag{unstable_equilibrium}{unstable equilibrium}
\psfrag{stable_equilibrium}{stable equilibrium}
\includegraphics[width=.5\textwidth]{images/local-equilibria}
\caption{A principle of energy minimization determines a lever's balance.}
\label{fig:local-equilibria}
\end{figure}
We can summarize all the above by proclaiming that we have a ``principle of least energy'' governing stable equilibria. We also have an analogy between statics and dynamics:
\begin{center}
\begin{tabular}{l|l}
\gapbelow Statics & Dynamics \\ \hline
\gapabove equilibrium, $F=0$ & $F=ma$ \\
potential, $V$ & action, $S=\dint_{t_0}^{t_1} K-V\,dt$ \\
critical points of $V$ & critical points of $S$
\end{tabular}
\end{center}
%%
%%
\subsection{From Virtual Work to the Principle of Least Action}
Sometimes laws of physics are just guessed using
a bit of intuition and a gut feeling that nature must be beautiful or
elegantly simple (though occasionally awesomely complex in beauty).
One way to make good guesses is to \emph{generalize}.
The principle of virtual work for statics says that equilibrium occurs when
\[
F(q_0)\cdot\delta q =0,\qquad \forall \delta q\in\mathbb{R}^n
\]
Around 1743, D'Alembert generalized this principle to dynamics
in his \textsl{Traite de Dynamique}. He did it by inventing what he called
the ``inertia force'', $-m\,a$, and postulating that in dynamics
equilibrium occurs when the so-called \textbf{total force}, $F - ma$,
vanishes. Of course this is just a restatement of Newton's law
$F = ma$. But this allowed him to generalize the principle of virtual work
from statics to dynamics. Namely, a particle will trace out a path
$q \colon [t_0,t_1] \to \mathbb{R}^n$ obeying
\begin{equation}
\label{eq:inertia-force}
\left[F(q(t))-ma(t)\right]\cdot\delta q(t) = 0
\end{equation}
for all $\delta q \colon [t_0,t_1] \to \mathbb{R}^n$ with
\[
\delta q(t_0) = \delta q(t_1) = 0.
\]
Let us see how this principle implies the principle of least action.
We create a family of paths parameterized by $s$ in the usual way
\[
q_s(t) = q(t)+s\,\delta q(t)
\]
and define the \textbf{variational derivative} of any function
$f$ on the space of paths by
\begin{equation}
\delta f(q) = \frac{d}{ds}f(q_s)\Bigr.\Bigl|_{s=0} .
\end{equation}
Then D'Alembert's generalized principle of virtual work implies
\[
\int_{t_0}^{t_1} \left[(F(q(t))-m\ddot{q}(t)\right] \cdot\delta q(t)\, dt = 0
\]
for all $\delta q$, so if $F=-\grad V$ we have
\begin{align*}
0 &= \int_{t_0}^{t_1}
\left[-\grad V(q(t)))-m\ddot{q}(t)\right]\cdot\delta q(t)\,dt \\
&= \int_{t_0}^{t_1}\left[-\grad V(q(t))\cdot\delta q(t)
+ m\qdot(t) \cdot \delta\qdot(t)\right]\,dt
\end{align*}
where in the second step we did an integration by parts, which has no
boundary terms since $\delta q(t_0) = \delta q(t_1) = 0$.
Next, using
\[
\begin{array}{ccccc}
\delta V(q(t))
&= \displaystyle{ \left.\frac{d}{ds}V(q_s(t))\right|_{s=0} }
&= \displaystyle{ \grad V(q) \cdot \left.\frac{dq_s(t)}{ds}\right|_{s=0} }
&= \grad V(q(t)) \cdot \delta q(t)
\end{array}
\]
and
\[ \delta(\qdot(t)^2) = 2 \qdot(t) \cdot \delta \qdot(t) \]
we obtain
\begin{align*}
0 &= \int_{t_0}^{t_1}\left[-\grad V(q(t))\cdot\delta q(t)
+ m\dot{q}(t) \cdot \delta\dot{q}(t)\right]\,dt \\
&= \int_{t_0}^{t_1} \left[- \delta V(q(t)) + \frac{m}{2} \delta
(\qdot(t)^2) \right] \,dt \\
&= \delta\biggl(\int_{t_0}^{t_1} \left[-V(q(t)) +
\frac{m}{2}\qdot(t)^2\right] \,dt \biggr) \\
&= \delta \biggl(\int_{t_0}^{t_1} \left[ K(t) - V(q(t)) \right]\,dt \biggr)
\end{align*}
and thus
\[
\delta S(q) = 0
\]
where
\[
S(q) = \int_{t_0}^{t_1}\left[ K(t) -V(q(t))\right] \,dt
\]
is the action of the path $q$.
In fact, Joseph-Louis Lagrange presented a calculation of this general sort
in 1768, and this idea underlies his classic text \textsl{M\'ecanique
Analitique}, which appeared 20 years later. This is why $K - V$ is called
the Lagrangian.
I hope you now see that the principle of least action is a natural
generalization of the principle of minimum energy
from statics to dynamics. Still, there's something unsatisfying about
the treatment so far. I did not really explain why one must
introduce the ``inertia force''---except, of course, that we need it
to obtain agreement with Newton's $F = ma$.
We conclude with a few more words about this mystery. Recall from
undergraduate physics that in an accelerating coordinate system there
is a fictional force $-ma$, which is called the \textbf{centrifugal force}.
We use it, for example, to analyze simple physics in a rotating reference
frame. If you are \emph{inside} the rotating system and you throw a
ball straight ahead it will appear to curve away from your target, and
if you did not know that you were rotating relative to the rest of the
universe then you'd think there was a force on the ball equal to the
centrifugal force. If you are inside a big rapidly rotating drum then
you'll also feel pinned to the walls. This is an example of an
inertia force which comes from using a funny coordinate system. In fact,
in general relativity one sees that---in a certain sense---\emph{gravity}
is an inertia force! But more about this later.
%%%%%%%%%%%%%%%%%%%%%CHAPTER 2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%
%%
%%
\chapter{Lagrangian Mechanics}
\label{ch:Equations_of_Motion}
%\noindent (Week 2, April 4, 6, 8.)\vspace{2ex}
In this chapter we'll look at Lagrangian mechanics in more generality,
and show the principle of least action is equivalent to some equations
called the Euler--Lagrange equations.
%%
%%
%%
\section{The Euler--Lagrange Equations}
We are going to start thinking of a general classical system as a set of points in an abstract \emph{configuration space} or phase space\footnote{The tangent bundle $TQ$ will be referred to as \emph{configuration space}, later on when we get to the chapter on Hamiltonian mechanics we'll find a use for the cotangent bundle $T^\ast Q$, and normally we call this the \emph{phase space}.}. So consider an arbitrary classical system as living in a space of points in some manifold $Q$. For example, the space for a spherical double pendulum would look like Fig.~\ref{fig:double-pendulum-space}, where $Q=S^2\times S^2$.
\begin{figure}[ht]
\centering
\psfrag{Q=S2xS2}{$Q=S^2\times S^2$}
\includegraphics[width=.33\textwidth]{images/double-pendulum-space}
\caption{Double pendulum configuration space.}
\label{fig:double-pendulum-space}
\end{figure}
So our system is ``a particle in $Q$'', which means you have to
disabuse yourself of the notion that we're dealing with real
particles: what we're really dealing with is a \emph{single
abstract particle} in an abstract higher dimensional space. This single
abstract particle represents two real particles if we are talking about
the classical system in Fig.~\ref{fig:double-pendulum-space}.
Sometimes to make this clear we'll talk about ``the system taking a
path'', instead of ``the particle taking a path''. It is then clear
that when we say, ``the system follows a path $q(t)$'' that we're
referring to the point $q$ in configuration space $Q$ that represents
all of the particles in the real system.
So as time passes, ``the system'' traces out a path
\[
q \colon [t_0,t_1]\longrightarrow Q
\]
and we define its \textbf{velocity}
\[
\dot{q}(t)\in T_{q(t)}Q
\]
to be the tangent vector at $q(t)$ given by the equivalence class
$[\sigma]$ of curves through $q(t)$ with derivatives
$\dot{\sigma}(t)=dq(s)/ds|_{s=t}$. We'll just write is as
$\dot{q}(t)$.
Let $\Gamma$ be the space of smooth paths from $a\in Q$ to $b\in Q$,
\[
\Gamma = \{q \colon [t_0,t_1]\rightarrow Q| q(t_0)=a, q(t_1)=b\}
\]
($\Gamma$ is an infinite dimensional manifold, but we won't go into
that for now.) Let the \textbf{Lagrangian} for the system be
\emph{any} smooth function of position and velocity:
\[
L: TQ\longrightarrow \mathbb{R}
\]
and define the \textbf{action}
\[
S \colon \Gamma \longrightarrow \mathbb{R}
\]
by
\begin{equation}
S(q) = \int_{t_0}^{t_1} L(q,\dot{q})\,dt
\end{equation}
The path that our abstract particle will actually take is a critical point of $S$. In other words, it will choose a path $q\in\Gamma$ such that for any smooth 1-parameter family of paths $q_s\in\Gamma$ with $q_0=q$, we have
\begin{equation}
\label{eq:leastaction}
\frac{d}{ds}S(q_s)\Bigl.\Bigr|_{s=0} = 0.
\end{equation}
For any function $f$ on the space of paths we define
its \textbf{variational derivative} by
\[
\delta f(q) =
\frac{d}{ds}f(q)\Bigl.\Bigr|_{s=0}
\]
so that Eq.\ \eqref{eq:leastaction} can be rewritten simply as
\begin{equation}
\delta S(q) = 0.
\end{equation}
What is a ``1-parameter family of paths''? It is nothing more nor less than a set of well-defined paths $\{q_s\}$, each one labeled by a parameter $s$. For a ``smooth'' 1-parameter family of paths, $q_s(t)$ depends smoothly on both
$s$ and $t$.
\begin{figure}[ht]
\centering
\psfrag{q0}{$q_0$} \psfrag{q_s}{$q_s$}
\includegraphics[width=.15\textwidth]{images/1-param-family}
\caption{Schematic of a 1-parameter family of curves.}
\label{fig:1-param-family}
\end{figure}
Thus, in Fig.~\ref{fig:1-param-family} we can go from $q_0$ to $q_s$ by smoothly varying the parameter from $0$ to a given value $s$.
Since $Q$ is a manifold, it admits a covering by coordinate charts. For now, let's pick coordinates in a neighborhood $U$ of some point $q(t)\in Q$. Next, consider only variations $q_s$ such that $q_s=q$ outside $U$. A cartoon of this looks like Fig.~\ref{fig:local-path-variation}
\begin{figure}[ht]
\centering
\psfrag{Q}{$Q$} \psfrag{U}{$U$} \psfrag{a}{$a$} \psfrag{b}{$b$}
\includegraphics[width=.5\textwidth]{images/local-path-variation}
\caption{Local path variation.}
\label{fig:local-path-variation}
\end{figure}
Then we restrict attention to a subinterval $[t_0',t_1']\subseteq
[t_0,t_1]$ such that $q_s(t)\in U$ for $t_0'\leq t\leq t_1'$.
Let's just go ahead and rename $t_0'$ and $t_1'$ as `` $t_0$ and $t_1$'' to drop the primes. We can use the coordinate charts on $U$,
\begin{align*}
\varphi \colon U &\longrightarrow \mathbb{R}^n \\
x & \longmapsto \varphi(x)=(x^1,x^2,\ldots,x^n)
\end{align*}
and we also have coordinates for the tangent vectors
\begin{align*}
d\varphi \colon TU &\longrightarrow T\mathbb{R}^n
\cong \mathbb{R}^n\times\mathbb{R}^n \\
(x,y) & \longmapsto d\varphi(x,y)=(x^1,\ldots,x^n,y^1,\ldots,y^n)
\end{align*}
where $y\in T_xQ$. We can restrict $L \colon
TQ \to \mathbb{R}$ to $TU\subseteq TQ$, and then we can describe
$L$ using the coordinates $x^i,y^i$ on $TU$. The $x^i$ are
\emph{position} coordinates, while the $y^i$ are the associated
\emph{velocity} coordinates. Using these coordinates we
have
\begin{align*}
\delta S &= \delta \int_{t_0}^{t_1} L(q(t),\dot{q}(t))\,dt \\
&= \int_{t_0}^{t_1} \delta L(q,\dot{q})\,dt \\
&= \int_{t_0}^{t_1} \Bigl(\frac{\pa L}{\pa x^i}\delta q^i
+ \frac{\pa L}{\pa y^i}\delta\dot{q}^i \Bigr) \,dt \\
\end{align*}
where we've used the smoothness of $L$ and the Einstein summation
convention for repeated indices $i$. Note that we can write $\delta
L$ as above using a local coordinate patch because the path variations
$\delta q$ are entirely trivial outside the patch for $U$.
Continuing, using the Leibniz rule
\[
\frac{d}{dt}\Bigl(\frac{\pa L}{\pa y}\delta q\Bigr)
= \frac{d}{dt}\frac{\pa L}{\pa y}\delta q
+ \frac{\pa L}{\pa y}\delta \dot{q}
\]
we have
\begin{align*}
\delta S &= \int_{t_0}^{t_1} \Bigl(\frac{\pa L}{\pa x^i}
- \frac{d}{dt}\frac{\pa L}{\pa y^i}\Bigr)\delta q^i(t)\,dt \\
&= 0.
\end{align*}
If this integral is to vanish as demanded by $\delta S=0$, then it must vanish for all path variations $\delta q$, further, the boundary terms vanish because we deliberately chose $\delta q$ that vanish at the endpoints $t_0$ and $t_1$ inside $U$. That means the term in brackets must be identically zero, or
\begin{equation}
\frac{d}{dt}\frac{\pa L}{\pa y^i} - \frac{\pa L}{\pa x^i} = 0
\end{equation}
This is necessary to get $\delta S=0$, for all $\delta q$, but in fact it's also sufficient. Physicists always give the coordinates $x^i$, $y^i$ on $TU$ the names ``$q^i$'' and ``$\dot{q}^i$'', despite the fact that these symbols also have another meaning, namely the $x^i$ and $y^i$ coordinates of the point
\[
\bigl(q(t),\dot{q}(t)\bigr) \in TU.
\]
Thus, physicists write
\[
\boxed{
\gapleft \gapabove
\frac{d}{dt}\frac{\pa L}{\pa \dot{q}^i} = \frac{\pa L}{\pa q^i}
\gapright \gapbelow
}
\]
and they call these the \textbf{Euler--Lagrange equations}.
Our derivation of these equations was fairly abstract: we used
the terms ``position'' and ``velocity'', but we did not
assume these were the usual notions of position and velocity for a particle
in $\mathbb{R}^3$, or even $\mathbb{R}^n$. So, to bring things down to earth,
consider the good old familiar case where the configuration space $Q$ is $\mathbb{R}^n$ and the Lagrangian is
\[ L(q,\dot{q}) = \frac{1}{2}m\dot{q}\vdot\dot{q} - V(q) \]
In this case
\[ \frac{\partial L}{\partial q^i} = -\frac{\partial V}{\partial q^i} = F_i \]
are the components of the the force on the particle, while
\[ \frac{\partial L}{\partial \qdot^i} = m \qdot^i \]
are the components of its mass times its velocity, also known as its
\textbf{momentum}. In physics momentum is denoted by $p$ for some obscure
reason, so we say
\[ \frac{\partial L}{\partial \qdot^i} = p_i \]
and the Euler--Lagrange equations say simply
\[ \frac{dp}{dt} = F .\]
The time derivative of momentum is force! Since $dp/dt$ is also mass times
acceleration, this is just another way of stating Newton's law
\[ F = ma .\]
Based on this example, we can make up nice names for the quantities in
the Euler--Lagrange equation in general, for any Lagrangian $L \colon
TQ \to \mathbb{R}$. We define
\[ F_i = \frac{\partial L}{\partial q^i} \]
and call this quantity the \textbf{force}, and we define
\[ p_i = \frac{\partial L}{\partial \qdot^i} \]
and call this quantity the \textbf{momentum}. The Euler--Lagrange equations
then say
\[ \frac{dp_i}{dt} = F_i .\]
Written this way, the general Euler--Lagrange equations are revealed to
be a generalization of Newton's law, with $ma$ replaced by the time derivative
of momentum.
\begin{table}[t]
\centering
%\caption{My caption}
%\label{my-label}
\begin{tabular}{cccc}
\textbf{Term} & \textbf{Meaning for a particle in a potential} &
\textbf{Meaning in general} \\ \cline{1-3}
& & & \\
$\displaystyle{\frac{\partial L}{\partial \dot{q^i}}}$
& $mv_i$ & momentum: $p_i$ \\
& & \\
$\displaystyle{\frac{\partial L}{\partial q^i}}$
& $\displaystyle{-\frac{\partial V}{\partial q^i}}$ & force: $F_i$
\end{tabular}
\end{table}
\section{Noether's Theorem}
\label{chap:NoethersTheorem}
If the form of a system of dynamical equations does not change under
spatial translations then the momentum is a conserved quantity. When
the form of the equations is similarly invariant under time
translations then the total energy is a conserved quantity (a constant
of the equations of motion). Time and space translations are examples
of 1-parameter groups of transformations. \emph{Invariance under a
group of transformations} is precisely what we mean by a symmetry in
group theory. So symmetries of a dynamical system give conserved
quantities or conservation laws. The rigorous statement of all this
is the content of \emph{Noether's theorem}.
%%
%%
%%
\subsection{Time Translation}
To handle time translations we need to replace our paths
$q \colon [t_0,t_1]\rightarrow Q$ by paths $q:\mathbb{R}\rightarrow Q$, and
then define a new space of paths,
\[
\Gamma = \{q:\mathbb{R}\rightarrow Q\}.
\]
The bad news is that the action
\[
S(q) = \int_{-\infty}^{\infty}L\Bigl(q(t),\dot{q}(t)\Bigr)\,dt
\]
typically will not converge, so $S$ is then no longer a function of the space of paths. Nevertheless, if $\delta q=0$ outside of some finite interval, then the functional variation,
\[
\delta S \defeq \int_{-\infty}^{\infty} \left.\frac{d}{ds}
L\Bigl(q_s(t),\dot{q}_s(t)\Bigr)\right|_{s=0}\, dt
\]
\emph{will} converge, since the integral is smooth and vanishes outside this
interval. Moreover, demanding that this $\delta S$ vanishes for all such
variations $\delta q$ is enough to imply the Euler--Lagrange equations:
\begin{align*}
\delta S &=
\int_{-\infty}^{\infty}\left.\frac{d}{ds}
L\Bigl( q_s(t),\dot{q}_s(t)\Bigr)\right|_{s=0}\, dt \\
&= \int_{-\infty}^{\infty}\left(\frac{\pa L}{\pa q_i}\delta q_i
+ \frac{\pa L}{\pa\dot{q}_i}\delta\dot{q}_i\right)\,dt \\
&= \int_{-\infty}^{\infty}\left(\frac{\pa L}{\pa q_i}
- \frac{d}{dt}\frac{\pa L}{\pa\dot{q}_i}\right)\delta q_i\,dt
\end{align*}
where again the boundary terms have vanished since $\delta q=0$ near
$t=\pm\infty$. To be explicit, the first term in
\[
\frac{\pa L}{\pa\qdot^i}\delta\qdot^i = \frac{d}{dt}\left(\frac{\pa L}{\pa\qdot^i}\delta q^i\right) - \left(\frac{d}{dt}\frac{\pa L}{\pa\qdot^i}\right)\delta q
\]
vanishes when we integrate. Then the whole thing vanishes for all compactly supported smooth $\delta q$ iff
\[
\frac{d}{dt}\frac{\pa L}{\pa\dot{q}_i} = \frac{\pa L}{\pa q_i}.
\]
So, we get Euler--Lagrange equations again.
%% WE MAY WANT THIS SOMEWHERE!!!
%%
%\subsection{Canonical and Generalized Coordinates}
%In light of this noted similarity with the Hamilton equations of motion, let's spend a few moments clearing up some terminology (I hate using jargon, but sometimes it's unavoidable, and sometimes it can be efficient---provided everyone is clued in).
%%
%%
\subsubsection{Generalized Coordinates}
%For Lagrangian mechanics we have been the so-called \textbf{generalized coordinates} $q_i$ and $\dot{q}_i$. The $q_i$ are generalized positions, and the $\dot{q}_i$ are generalized velocities. The full set of independent generalized coordinates represent the degrees of freedom of a particle, or system of particles. So if we have $N$ particles in 3-dimensional space, we would typically have $6N$ generalized coordinates (where the ``6'' comes from 3 coordinates of position and 3 of velocity). These can be in any reference frame or system of axes, so for example, in a Cartesian frame, with two particles, in 3d space we'd have the $2\times 3=6$ position coordinates, and $2\times 3=6$ velocities,
%\[
% \{x_1,y_1,z_1,x_2,y_2,z_2\}, \{u_1,v_1,w_1,u_2,v_2,w_2\}
% \]
%where say $u=v_x$, $v=v_y$, $w=v_z$ are the Cartesian velocity components. This makes $12=6\times2=6N$ coordinates, matching the total degrees of freedom as claimed. If we constrain the particles to move in a plane (say place them on a table in a gravitational field) then we get $2N$ fewer degrees of freedom, and so $4N$ d.o.f.\ overall. By judicious choice of coordinate frame we can eliminate one velocity component and one position component for each particle.
%It is also handy to respect other symmetries of a system, maybe the particles move on a sphere for example, one can then define newpositions and momenta with a consequent reduction in the number ofthese generalized coordinates needed to describe the system.
%% WE MAY WANT THIS SOMEWHERE!!!
%%
%\subsubsection{Canonical Coordinates}
% In Hamiltonian mechanics (which we have not yet fully introduced) we will find it more useful to transform from generalized coordinates to canonical coordinates. The canonical coordinates are a special set of coordinates on the cotangent bundle of the configuration space manifold $Q$. They are usually written as a set of $(q^i,p_j)$ or $(x^i,p_j)$ with the $x$'s or $q$'s denoting the coordinates on the underlying manifold and the $p$'s denoting the conjugate momentum, which are 1-forms in the cotangent bundle at the point $q$ in the manifold.
% It turns out that the $q^i$ together with the $p_j$, form a coordinate system on the cotangent bundle $T^{\ast}Q$ of the configuration space $Q$, hence these coordinates are called the \define{canonical coordinates}.
% We will not discuss this here, but if you care to know, later on we'll see that the relation between the generalized coordinates and the canonical coordinates is given by the Hamilton-Jacobi equations for a system.
%%
%%
%%
\subsection{Symmetries}
First, let's give a useful definition that will make it easy to refer to a type
of dynamical system symmetry. We want to refer to symmetry transformations
(of the Lagrangian) governed by a single parameter.
\begin{definition}[one-parameter family of symmetries]
\label{def:1-parameter-family}
A 1-parameter family of symmetries of a Lagrangian system
$L:TQ\rightarrow\mathbb{R}$ is a smooth map,
\begin{align*}
F:\mathbb{R}\times\Gamma &\longrightarrow\Gamma \\
(s,q) & \longmapsto q_s,\qquad\text{with $q_0=q$}
\end{align*}
such that there exists a function $\ell(q,\dot{q})$ for which
\[
\delta L = \frac{d \ell}{dt}
\]
for some $\ell:TQ\rightarrow\mathbb{R}$, that is,
\[
\left.\frac{d}{ds}L\Bigl(q_s(t),\dot{q}_s(t)\Bigr)\right|_{s=0}
= \frac{d}{dt}\ell\Bigl(q_s(t),\dot{q}_s(t)\Bigr)
\]
for all paths $q$.
\end{definition}
%%
\paragraph{Remark:}
The simplest case is $\delta L=0$, in which case we really have a way of moving
paths around ($q\mapsto q_s$) that doesn't change the Lagrangian---i.e., a
symmetry of $L$ in the most obvious way. But $\delta L = \frac{d}{dt}\ell$ is a
sneaky generalization whose usefulness will become clear.
%%
%%
\subsection{Noether's Theorem}
\label{sec:NoethersTheorem}
Here's a statement of the theorem. Note that $\ell$ in this theorem is the function associated with $F$ in Definition~\ref{def:1-parameter-family}.
\begin{theorem}[Noether's Theorem]
Suppose $F$ is a one-parameter family of symmetries of the Lagrangian system,
$L:TQ\rightarrow\mathbb{R}$. Then,
\[
p_i \delta q^i - \ell
\]
is conserved, that is, its time derivative is zero for any path $q\in\Gamma$
satisfying the Euler--Lagrange equations. In other words, in boring detail:
\[
\frac{d}{dt}\left[\frac{\pa L}{\pa y^i}\bigl(q(s)\dot{q}(s)\bigr)
\left.\frac{d}{ds}q_s^i(t)\right|_{s=0}
- \ell\bigl(q(t),\dot{q}(t)\bigr)\right] = 0
\]
\end{theorem}
\begin{proof}
\begin{align*}
\frac{d}{dt}\Bigl(p^i\delta q_i - \ell\Bigr) &= \dot{p}_i\delta q^i
+ p_i\delta\dot{q}^i - \frac{d}{dt}\ell \\
&= \frac{\pa L}{\pa q^i}\delta q^i
+\frac{\pa L}{\pa\dot{q}^i}\delta\dot{q}^i - \delta L \\
&= \delta L - \delta L = 0. \qedhere
\end{align*}
\end{proof}
``Okay, big deal'' you might say.
Before this can be of any use we'd need to find a
symmetry $F$. Then we'd need to find out what this $p_i\delta q^i-\ell$
business is that is conserved. So let's look at some examples.
%%
%%
\subsection{Conservation of Energy}
\begin{enumerate}
\item \textbf{Conservation of Energy.} (The most important example!)
All of our Lagrangian systems will have time translation invariance (because
the laws of physics do not change with time, at least not to any extent that we
can tell). So we have a one-parameter family of symmetries
\[
q_s(t) = q(t+s)
\]
This indeed gives,
\begin{align*}
\delta L &= \dot{L} \\
\intertext{for}
\left.\frac{d}{ds}L(q_s)\right|_{s=0} &= \frac{d}{dt}L = \dot{L}
\end{align*}
so here we take $\ell = L$ simply! We then get the conserved quantity
\[
p_i\delta q^i - \ell = p_i\dot{q}^i - L
\]
which we normally call the \define{energy}. For example, if $Q=\mathbb{R}^n$,
and if
\[
L = \frac{1}{2}m\dot{q}^2 - V(q)
\]
then this quantity is
\[
m\dot{q}\vdot\dot{q} - \Bigl(\frac{1}{2}m\dot{q}\vdot\dot{q}-V\Bigr)
= \frac{1}{2}m\dot{q}^2 + V(q).
\]
The term in parentheses is $K-V$, and the left-hand side is $K+V$.
\end{enumerate}
Let's repeat this example, this time with a specific Lagrangian. It doesn't
matter what the Lagrangian is, if it has 1-parameter families of symmetries
then it'll have conserved quantities, guaranteed. The trick in physics is to
write down a correct Lagrangian in the first place! (Something that will accurately describe the system of interest.)
%%
%%
%%
\section{Conserved Quantities from Symmetries}
We've seen that any 1-parameter family
\begin{align*}
F_s:\Gamma &\longrightarrow\Gamma \\
q&\longmapsto q_s
\end{align*}
which satisfies
\[
\delta L = \dot{\ell}
\]
for some function $\ell=\ell(q,\dot{q})$ gives a conserved quantity
\[
p_i\delta q^i - \ell.
\]
As usual we've defined
\[
\delta L = \left.\frac{d}{ds}L\Bigl(q_s(t),\dot{q}_s(t)\Bigr)\right|_{s=0}.
\]
Let's see how we arrive at a conserved quantity from a symmetry.
%%
%%
\subsection{Time Translation Symmetry}
For any Lagrangian system, $L:TQ\rightarrow\mathbb{R}$, we have a 1-parameter
family of symmetries
\[
q_s(t) = q(t+s)
\]
because
\[
\delta L = \dot{L}
\]
so we get a conserved quantity called the \define{total energy} or
\define{Hamiltonian},
\begin{equation}
H = p_i\dot{q}^i - L
\end{equation}
(You might prefer ``Hamiltonian'' to ``total energy'' because in general we
are not in the same configuration space as Newtonian mechanics, if you are doing
Newtonian mechanics then ``total energy'' is appropriate.)
For example: a particle on $\mathbb{R}^n$ in a potential V has
$Q=\mathbb{R}^n$, $L(q,\dot{q})=\frac{1}{2}m\dot{q}^2-V(q)$. This system has
\[
p_i\dot{q}^i = \frac{\pa L}{\pa\dot{q}^i}\dot{q}^i = m\dot{q}^2 = 2K
\]
so
\[
H = p_i\dot{q}^i - L = 2K - (K - V) = K+V
\]
as you'd have hoped.
%%
%%
\subsection{Space Translation Symmetry}
For a free particle in $\mathbb{R}^n$, we have $Q=\mathbb{R}^n$ and
$L=K=\frac{1}{2}m\dot{q}^2$. This has \define{spatial translation} symmetries,
so that for any $v\in\mathbb{R}^n$ we have the symmetry
\[
q_s(t) = q(t) + s\, v
\]
with
\[
\delta L = 0
\]
because $\delta\dot{q}=0$ and $L$ depends only on $\dot{q}$ not on $q$
in this particular case. (Since $L$ does not depend upon $q^i$ we'll
call $q^i$ an \define{ignorable coordinate}; as above, these
ignorables always give symmetries, hence conserved quantities. It is
often useful therefore, to change coordinates so as to make some of
them ignorable if possible!)
In this example we get a conserved quantity called \define{momentum in the $v$
direction}:
\[
p_i\delta q^i = m\dot{q}_i v^i = m\dot{q}\vdot v.
\]
%%
\paragraph{Aside:} Note the subtle difference between two uses of the term
``momentum''; here it is a conserved quantity derived from space translation
invariance, but earlier it was a different thing, namely the momentum $\pa
L/\pa\dot{q}^i = p_i$ conjugate to $q^i$. These two different ``momentum's''
happen to be the same in this example!
Since this is conserved for all $v$ we say that $m\dot{q}\in\mathbb{R}^n$ is
conserved. (In fact that whole Lie group $G=\mathbb{R}^n$ is acting as
a translation symmetry group, and we're getting a
$\mathfrak{q}(=\mathbb{R}^n)$-valued conserved quantity!)
%%
%%
\subsection{Rotational Symmetry}
The free particle in $\mathbb{R}^n$ also has rotation symmetry.
Consider any $X\in\mathfrak{so}(n)$ (that is a skew-symmetric $n\times n$
matrix), then for all $s\in\mathbb{R}$ the matrix $e^{sX}$ is in $SO(n)$,
that is, it describes a rotation. This gives a 1-parameter family of
symmetries
\[
q_s(t) = e^{sX} q(t)
\]
which has
\[
\delta L = \frac{\pa L}{\pa q^i}\delta q^i
+ \frac{\pa L}{\pa\dot{q}^i}\delta\dot{q}^i = m\dot{q}_i\delta\dot{q}^i.
\]
Now $q_i$ is ignorable, so $\pa L/\pa q^i = 0$, and $\pa L/\pa \dot{q}^i =
p_i$, and
\begin{align*}
\delta\dot{q}^i &= \left.\frac{d}{ds}\dot{q}_s^i\right|_{s=0} \\
&= \left.\frac{d}{ds}\frac{d}{dt}\Bigl(e^{sX}q\Bigr)\right|_{s=0} \\
&= \frac{d}{dt}X\, q \\
&= X\,\dot{q}.
\end{align*}
Thus,
\begin{align*}
\delta L &= m\dot{q}_i X^i_j \dot{q}^j \\
&= m\dot{q}\vdot(X\,\dot{q}) \\
&= 0
\end{align*}
since $X$ is skew symmetric as stated previously ($X\in\mathfrak{so}(n)$). So we get a conserved quantity, the \define{angular momentum} in the $X$ direction.
(Note: this whole bunch of math above for $\delta L$ just says that the
kinetic energy doesn't change when the velocity is \emph{rotated}, without
changing its magnitude.)
We write,
\[
p_i\delta q^i = m\dot{q}_i\vdot(X\,q)^i
\]
($\delta q^i = Xq$ just as $\delta\dot{q}^i=X\dot{q}$ in our previous calculation), or if $X$ has zero entries except in $ij$ and $ji$ positions, where it's $\pm 1$, then we get
\[
m(\dot{q}_i q^j - \dot{q}_j q^i)
\]
the ``$ij$ component of angular momentum''. If $n=3$ we can write these as
\[
m\dot{\vect{q}}\cross \vect{q}.
\]
Note that above we have assumed one can construct a basis for $\mathfrak{so}(n)$ using matrices of the form assumed for $X$, i.e., skew symmetric with $\pm 1$ in the respectively $ij$ and $ji$ elements, otherwise zero.
I mentioned earlier that we can do mechanics with any Lagrangian, but if we want to be useful we'd better pick a Lagrangian that actually describes a real system. But how do we do that? All this theory is fine but is useless unless you know how to apply it. The above examples were for a particularly simple system, a free particle, for which the Lagrangian is just the kinetic energy, since there is no potential energy variation for a free particle. We'd like to know how to solve more complicated dynamics.
The general idea is to guess the kinetic energy and potential energy of the particle (as functions of your generalized positions and velocities) and then let,
\[
L = K - V
\]
So we are not using Lagrangians directly to tell us what the fundamental physical laws should be, instead we plug in some assumed physics and use the Lagrangian approch to solve the system of equations. If we like, we can then compare our answers with experiments, which indirectly tells us something about the physical laws---but only provided the Lagrangian formulation of mechanics is itself a valid procedure in the first place.
%%
%%
%%
\chapter{Examples}
% \noindent (Week 3, Apr. 11, 13, 15.)\vspace{2ex}
To see how Lagrangian mechanics and Noether's theorem
works in practise, let's do some problems.
The Lagrangian approach is often vastly superior to the
simplistic $F=ma$ formulation of mechanics. The Lagrangian
formulation allows the configuration space to be any manifold, and
allows us to easily use any coordinates we wish.
%%
%%
\section{The Atwood Machine}
Consider a frictionless pulley with two masses, $m_1$ and $m_2$, hanging from it:
\begin{figure}[ht]
\centering
\psfrag{x}{$x$} \psfrag{l-x}{$\ell-x$} \psfrag{m1}{$m_1$} \psfrag{m2}{$m_2$}
\includegraphics[width=.15\textwidth]{images/atwood-pulley}
%\caption{The Atwood machine.}
%\label{fig:atwood-pulley}
\end{figure}
\noindent We have
\begin{align*}
K &= \frac{1}{2}(m_1+m_2)(\frac{d}{dt}(\ell-x))^2 =
\frac{1}{2}(m_1+m_2)\dot{x}^2 \\
V &= -m_1 g x - m_2 g(\ell-x) \\
\intertext{so}
L &= K-V = \frac{1}{2}(m_1+m_2)\dot{x}^2 + m_1gx+m_2g(\ell-x).
\end{align*}
The configuration space is $Q=(0,\ell)$, and $x\in(0,\ell)$ (we could use the ``owns'' symbol $\ni$ here and write $Q=(0,\ell) \ni x$ ). Moreover $TQ=(0,\ell)\times\mathbb{R}\ni(x,\dot{x})$.
As usual $L:TQ\rightarrow\mathbb{R}$. Note that solutions of the Euler--Lagrange equations will only be defined for \emph{some} time $t\in\mathbb{R}$, as eventually the solutions reaches the ``edge'' of $Q$.
The momentum is:
\[
p = \frac{\pa L}{\pa\dot{x}} = (m_1+m_2)\dot{x}
\]
and the force is:
\[
F = \frac{\pa L}{\pa x} = (m_1-m_2)g
\]
The Euler--Lagrange equations say
\begin{align*}
\dot{p} &= F \\
(m_1+m_2)\ddot{x} &= (m_1-m_2)g \\
\ddot{x} &= \frac{m_1-m_2}{m_1+m_2}g
\end{align*}
So this is like a falling object in a downwards gravitational acceleration $a=\left(\frac{m_1-m_2}{m_1+m_2}\right)g$.
It is trivial to integrate the expression for $\ddot{x}$ twice (feeding in some appropriate initial conditions) to obtain the complete solution to the motion $x(t)$ and $\dot{x}(t)$. Note that $\ddot{x}=0$ when $m_1=m_2$, and $\ddot{x}=g$ if $m_2=0$.
%%
%%
\section{Bead on a Rotating Rod}
Now consider a bead of mass $m$ sliding in a frictionless way on a
rod rotating in a horizontal plane. The rod will go through the
origin $(0,0) \in \mathbb{R}^2$ and rotate at a constant angular
velocity $\omega$, so if the angle of the rod is $\theta(t)$ at time
$t$, we may as well assume
\[ \theta(t) = \omega t .\]
The bead's position on the rod will be given by a number $q(t) \in \mathbb{R}$
depending on time. What will the bead do?
Since the bead lies on a line, namely the rod, its configuration space is
$Q = \mathbb{R}$, and its Lagrangian is a function $L \colon TQ \to \mathbb{R}$
where $TQ = \mathbb{R} \times \mathbb{R}$. Its position and velocity thus
form a pair $(q(t), \qdot(t)) \in TQ$.
Since the rod lies in a horizontal plane, its gravitational potential
energy is constant, and doesn't affect the Euler--Lagrange equations to
assume this constant is zero. So, we take the bead's potential energy
to be
\[ V = 0 .\]
Its kinetic energy is
\[ K = \frac{1}{2} m v \cdot v \]
where $v$ is its velocity. But what is its velocity? Its position in the
plane at time $t$ is
\[ \begin{array}{ccl}
(x(t),y(t)) &=& (r(t) \cos \theta(t), r(t) \sin \theta(t)) \\
&=& (|q(t)| \cos (\omega t), |q(t)| \sin (\omega t)) .
\end{array}
\]
The time derivative of $|q(t)|$ is $\pm q(t)$, with the plus sign if
$q(t) > 0$ and the minus sign if $q(t) < 0$. If $q(t) = 0$ we seem
to be in trouble, because the absolute value is not differentiable at
zero, but we'll see a way around this later. Ignoring this case for now,
the bead's velocity in the plane is thus
\[ \begin{array}{ccl}
v(t) &=& (\dot{x}(t), \dot{y}(t)) \\ \\
&=& \displaystyle{\frac{d}{dt}}
(|q(t)| \cos (\omega t), |q(t)| \sin (\omega t)) \\ \\
&=& (\pm \dot{q}(t) \cos (\omega t) - |q(t)| \omega \sin (\omega t),
\pm \dot{q}(t) \sin (\omega t) + |q(t)| \omega \cos (\omega t)) .
\end{array}
\]
so we have
\[ \begin{array}{ccl}
v(t) \cdot v(t) &=& \qdot^2 \cos^2(\omega t)
\mp |q| \qdot \sin(\omega t) \cos(\omega(t))
+ \omega^2 q^2 \sin^2(\omega t) + \\
&& \qdot^2 \sin^2(\omega t)
\pm |q| \qdot \sin(\omega t) \cos(\omega(t))
+ \omega^2 q^2 \cos^2(\omega t) \\
&=& \qdot^2 + \omega^2 q^2.
\end{array}
\]
Thus, the bead's kinetic energy is
\[ K = \frac{1}{2} m v \cdot v = \qdot^2 + \omega^2 q^2 .\]
This has a simple interpretation: the first term is the `radial' part
of the kinetic energy, while the second term is the `angular' part.
The Lagrangian of the bead is
\[ L(q,\qdot) = K - V = \qdot^2 + \omega^2 q^2 .\]
Note that this is perfectly well-defined and smooth at $q = 0$. Thus, our
problem at that point is easily dealt with: simply define the Lagrangian as
above. A more careful analysis shows this is reasonable.
The force on the bead is
\[ F = \frac{\partial L}{\partial q} = m \omega^2 q .\]
This is called \textbf{centrifugal force}, since it's caused by the rotating
rod and tends to pull the bead out. The bead's momentum is
\[ p = \frac{\partial L}{\partial \qdot} = m \qdot. \]
The Euler--Lagrange equation for the bead says
\[ \frac{dp}{dt} = F \]
or
\[ m \ddot{q}(t) = m \omega^2 q(t) \]
or simply
\[ \ddot q(t) = \omega^2 q(t) .\]
The mass of the bead does not affect its equation of motion. This equation
is easy to solve, too:
\[ q(t) = A e^{\omega t} + B e^{-\omega t} .\]
Thus, the bead is likely to shoot off to infinity as $t \to +\infty$, due
to the centrifugal force. If $\omega > 0$, the only exception is when
$A = 0$: in this case the bead moves ever closer to the origin, or else
just sits there if $A = B = 0$.
%%
%%
\section{Disk Pulled by Falling Mass}
Consider next a disk pulled across a table by a falling mass. The disk is free to move on a frictionless surface, and it can thus whirl around the hole to which it is tethered to the mass below.
\begin{figure}[ht]
\centering
\psfrag{r}{$r$} \psfrag{l-r}{$\ell-r$} \psfrag{m1}{$m_1$} \psfrag{m2}{$m_2$}
\psfrag{no swinging}{no swinging allowed!}
\includegraphics[width=.33\textwidth]{images/disk-string-mass}
%\caption{Disk dragged by gravitationally falling mass on a string.}
%\label{fig:disk-string-mass}
\end{figure}
\begin{align*}
\text{Here }Q &=\text{ open disk of radius $\ell$, minus its center} \\
&= (0,\ell)\times S^1\,\ni (r,\theta)
\end{align*}
\[
TQ = (0,\ell)\times S^1\times\mathbb{R}\times\mathbb{R}
\,\ni (r,\theta,\dot{r},\dot{\theta})
\]
\begin{align*}
K &= \frac{1}{2}m_1(\dot{r}^2+r^2\dot{\theta}^2)
+ \frac{1}{2}m_2(\frac{d}{dt}(\ell-r))^2 \\
V &= gm_2(r-\ell) \\
L &= \frac{1}{2}m_1(\dot{r}^2+r^2\dot{\theta}^2) +\frac{1}{2}m_2\dot{r}^2
+gm_2(\ell-r)
\end{align*}
having noted that $\ell$ is constant so $d/dt(\ell-r)=-\dot{r}$. For the momenta we get,
\begin{align*}
p_r &= \frac{\pa L}{\pa\dot{r}} = (m_1+m_2)\dot{r} \\
p_\theta &= \frac{\pa L}{\pa\dot{\theta}} = m_1r^2\dot{\theta}.
\end{align*}
Note that $\theta$ is an ``ignorable coordinate''---it doesn't appear in $L$---so there's a symmetry, rotational symmetry, and $p_\theta$, the conjugate momentum, is conserved.
The forces are
\begin{align*}
F_r &= \frac{\pa L}{\pa r} = m_1 r\dot{\theta}^2 - gm_2 \\
F_\theta &= \frac{\pa L}{\pa\theta} = 0 \quad\text{($\theta$ is ignorable)}.
\end{align*}
Note: in $F_r$ the term $m_1 r\dot{\theta}^2$ is recognizable as a centrifugal force, pushing $m_1$ radially \emph{out}, while the term $-gm_2$ is gravity pulling $m_2$ down and thus pulling $m_1$ radially \emph{in}.
So, the Euler--Lagrange equations give
\begin{align*}
\dot{p}_r &= F_r, \quad (m_1+m_2)\ddot{r} = m_1r\dot{\theta}^2-m_2g \\
\dot{p}_\theta &=0, \quad p_\theta=m_1r^2\dot{\theta} = J =\text{ a constant}.
\end{align*}
Let's use our conservation law here to eliminate $\dot{\theta}$ from the first equation:
\begin{align*}
\dot{\theta} &= \frac{J}{m_1 r^2} \\
\intertext{so}
(m_1+m_2)\ddot{r} &= \frac{J^2}{m_1r^3}-m_2g
\end{align*}
Thus effectively we have a particle on $(0,\ell)$ of mass $m=m_1+m_2$ feeling a force
\[
F_r = \frac{J^2}{m_1 r^3} - m_2 g
\]
which could come from an ``effective potential'' $V(r)$ such that $dV/dr=-F_r$. So integrate $-F_r$ to find $V(r)$:
\[
V(r) = \frac{J^2}{2m_1r^2}+m_2 g r
\]
this is a sum of two terms that look like Fig.~\ref{fig:disk-string-mass_potential}
\begin{figure}[ht]
\centering
\psfrag{r}{$r$} \psfrag{r0}{$r_0$} \psfrag{V(r)}{$V(r)$}
\psfrag{attractive grav. potnl}{\ssz{attractive gravitational potential}}
\psfrag{repulsive centrifugal}{\ssz{repulsive centrifugal potential}}
\psfrag{no swinging}{no swinging allowed!}
\includegraphics[width=.5\textwidth]{images/disk-string-mass_potential}
\caption{Potential function for disk pulled by gravitating mass.}
\label{fig:disk-string-mass_potential}
\end{figure}
If $\dot{\theta}(t=0)=0$ then there is no centrifugal force and the disk will be pulled into the hole until it gets stuck. At that time the disk reaches the hole, which is topologically the center of the disk that has been removed from $Q$, so then we've hit the boundary of $Q$ and our solution is broken.
At $r=r_0$, the minimum of $V(r)$, our disc mass $m_1$ will be in a stable circular orbit of radius $r_0$ (which depends upon $J$). Otherwise we get orbits like Fig.~\ref{fig:disk-string-mass_orbits}.
\begin{figure}[ht]
\centering
\includegraphics[width=.33\textwidth]{images/disk-string-mass_orbits}
\caption{Orbits for the disc and gravitating mass system.}
\label{fig:disk-string-mass_orbits}
\end{figure}
%%
%%
\section{Free Particle in Special Relativity}
\label{ssec:RelativisticFreeParticle}
In relativistic dynamics the parameter coordinate that parametrizes the particle's path in Minkowski spacetime need not be the ``time coordinate'', indeed in special relativity there are many allowed time coordinates.
Minkowski spacetime is,
\[
\mathbb{R}^{n+1} \ni (x^0,x^1,\ldots,x^n)
\]
if space is $n$-dimensional. We normally take $x^0$ as ``time'', and $(x^1,\ldots,x^n)$ as ``space'', but of course this is all relative to one's reference frame. Someone else traveling at some high velocity relative to us will have to make a Lorentz transformation to translate from our coordinates to theirs.
This has a Lorentzian metric
\begin{align*}
g(v,w) &= v^0w^0-v^1w^1 -\ldots - v^nw^n \\
&=\eta_{\mu\nu}v^\mu w^\nu
\end{align*}
where
\[
\eta_{\mu\nu} = \left(
\begin{array}{ccccc}
1 & 0 & 0 & \ldots & 0 \\
0 & -1 & 0 & \ldots & 0 \\
0 & 0 & -1 & & 0 \\
\vdots & \vdots & & \ddots & \vdots \\
0 & 0 & 0 & \ldots & -1
\end{array}\right).
\]
In special relativity we take \emph{spacetime} to be the configuration space of a single point particle, so we let $Q$ be Minkowski spacetime, i.e., $\mathbb{R}^{n+1}\ni(x^0,\ldots,x^n)$ with the metric $\eta_{\mu\nu}$ defined above. Then the path of the particle is,
\[
q:\mathbb{R}(\ni t) \longrightarrow Q
\]
where $t$ is a completely arbitrary parameter for the path, not necessarily $x^0$, and not necessarily \emph{proper time} either. We want some Lagrangian $L:TQ\rightarrow\mathbb{R}$, i.e., $L(q^i,\dot{q}^i)$ such that the Euler--Lagrange equations will dictate how our free particle moves at a constant velocity. Many Lagrangians do this, but the ``best'' one should give an action that is \emph{independent} of the parameterization of the path---since the parameterization is ``unphysical'': it can't be measured. So the action
\[
S(q) = \int_{t_0}^{t_1} L\Bigl(q^i(t),\dot{q}^i(t)\Bigr)\,dt
\]
for $q\colon [t_0,t_1]\rightarrow Q$, should be independent of $t$. The obvious candidate for $S$ is mass times arclength,
\[
S = m\int_{t_0}^{t_1} \sqrt{\eta_{ij}\dot{q}^i(t)\dot{q}^j(t)}\,dt
\]
or rather the Minkowski analogue of arclength, called \define{proper time}, at least when $\dot{q}$ is a timelike vector, i.e., $\eta_{ij}\dot{q}^i\dot{q}^j>0$, which says $\dot{q}$ points into the future (or past) lightcone and makes $S$ \emph{real}, in fact it's then the time ticked off by a clock moving along the path $q:[t_0,t_1]\rightarrow Q$.
\begin{figure}[ht]
\centering
\includegraphics[width=.25\textwidth]{images/lightcone}
%\caption{Lightcone at a point in spacetime, showing timelike, spacelike and null (lightlike) vectors.}
%\label{fig:lightcone}
\end{figure}
By ``obvious candidate'' we are appealing somewhat to physical intuition and generalization. In Euclidean space, free particles follow straight paths, so the arclength or pathlength variation is an extremum, and we expect the same behavior in Minkowski spacetime. Also, the arclength does not depend upon the parameterization, and lastly, the mass $m$ merely provides the correct units for `action'.
So let's take
\begin{equation}
\label{eq:relativistic-Lagrangian}
L = m \sqrt{\eta_{ij}\dot{q}^i\dot{q}^j}
\end{equation}
and work out the Euler--Lagrange equations. We have
\begin{align*}
p_i = \frac{\pa L}{\pa\dot{q}^i}
&= m\frac{\pa}{\pa\dot{q}^i}\sqrt{\eta_{ij}\dot{q}^i\dot{q}^j} \\
&= m\frac{2\eta_{ij}\dot{q}^j}{2\sqrt{\eta_{ij}\dot{q}^i\dot{q}^j}} \\
&= m\frac{\eta_{ij}\dot{q}^j}{\sqrt{\eta_{ij}\dot{q}^i\dot{q}^j}}
\, = \frac{m\dot{q}_i}{\norm{\dot{q}}}.
\end{align*}
Now note that this $p_i$ doesn't change when we change the parameter to accomplish $\dot{q}\mapsto \alpha\dot{q}$. The Euler--Lagrange equations say
\[
\dot{p}_i = F_i = \frac{\pa L}{\pa q^i} = 0.
\]
The meaning of this becomes clearer if we use ``proper time'' as our parameter (like parameterizing a curve by its arclength) so that
\[
\int_{t_0}^{t_1}\norm{\dot{q}}dt = t_1-t_0, \quad \forall\,t_0,t_1
\]
which fixes the parametrization up to an additive constant. This implies $\norm{\dot{q}}=1$, so that
\[
p_i = m\frac{\dot{q}_i}{\norm{\dot{q}}} = m\dot{q}_i
\]
and the Euler--Lagrange equations say
\[
\dot{p}_i =0 \Rightarrow m\ddot{q}_i = 0
\]
so our free particle moves unaccelerated along a straight line, as one
would expect.
%%
%%
\subsection{Comments}
This Lagrangian from Eq.\eqref{eq:relativistic-Lagrangian} has lots of
symmetries coming from \emph{reparameterizing the path}, so Noether's
theorem yields lots of conserved quantities for the relativistic free
particle. This is in fact called ``the problem of time'' in general
relativity. Here we see it starting to show up in special relativity.
These reparameterization symmetries work as follows. Consider any (smooth) 1-parameter family of reparameterizations, i.e., diffeomorphisms
\[
f_s \colon \mathbb{R}\longrightarrow\mathbb{R}
\]
with $f_0= 1_\mathbb{R}$. These act on the space of paths $\Gamma = \{q:\mathbb{R}\rightarrow Q\}$ as follows: given any $q\in\Gamma$ we get
\[
q_s(t) = q\bigl(f_s(t)\bigr)
\]
where we should note that $q_s$ is \emph{physically indistinguishable} from $q$. Let's show that
\[
\delta L = \dot{\ell},\quad\text{(when Euler--Lagrange eqns.\ hold)}
\]
so that Noether's theorem gives a conserved quantity
\[
p_i\delta q^i - \ell
\]
Here we go then:
\begin{align*}
\delta L &= \frac{\pa L}{\pa q^i}\delta q^i
+ \frac{\pa L}{\pa\dot{q}^i}\delta\dot{q}^i \\
&= p_i\delta\dot{q}^i \\
&= \frac{m\dot{q}_i}{\norm{\dot{q}}}
\left.\frac{d}{ds}\dot{q}^i\Bigl(f_s(t)\Bigr)\right|_{s=0} \\
&= \frac{m\dot{q}_i}{\norm{\dot{q}}}
\left.\frac{d}{dt}\frac{d}{ds} q^i\Bigl(f_s(t)\Bigr)\right|_{s=0} \\
&= \frac{m\dot{q}_i}{\norm{\dot{q}}}
\left.\frac{d}{dt}\dot{q}^i\Bigl(f_s(t)\Bigr)\frac{f_s(t)}{ds}\right|_{s=0} \\
&= \frac{m\dot{q}_i}{\norm{\dot{q}}}
\frac{d}{dt}\left(\dot{q}^i\delta f_s\right) \\
&= \frac{d}{dt}\left(p_i\dot{q}^i\delta f\right)
\end{align*}
where in the last step we used the Euler--Lagrange equations, i.e.\ $\frac{d}{dt}p_i=0$, so $\delta L=\dot{\ell}$ with $\ell=p_i\dot{q}^i\delta f$.
So to recap a little: we saw the free relativistic particle has
\[
L = m\norm{\dot{q}} = m\sqrt{\eta_{ij}\dot{q}^i\dot{q}^j}
\]
and we've considered reparameterization symmetries
\[
q_s(t) = q\Bigl(f_s(t)\Bigr), \qquad f_s:\mathbb{R}\rightarrow\mathbb{R}
\]
we've used the fact that
\[
\delta q^i = \left.\frac{d}{ds}q^i\Bigl(f_s(t)\Bigr)\right|_{s=0}
= \dot{q}^i\delta f
\]
so (repeating a bit of the above)
\begin{align*}
\delta L &= \frac{\pa L}{\pa q^i}\delta q^i
+ \frac{\pa L}{\pa\dot{q}^i}\delta\dot{q}^i \\
&= p_i\delta\dot{q}^i,\quad\text{(since $\pa L/\pa q^i =0$,
and $\pa L/\pa\dot{q}^i = p$)} \\
&= p_i\delta\dot{q}^i \\
&= p_i\frac{d}{dt}\delta q^i \\
&= p_i\frac{d}{dt}\dot{q}^i\delta f \\
&= \frac{d}{dt} p_i\dot{q}^i\delta f,
\quad\text{and set $p_i\dot{q}^i\delta f=\ell$ }
\end{align*}
so Noether's theorem gives a conserved quantity
\begin{align*}
p_i\delta q^i - \ell &= p_i\dot{q}^i\delta f - p_i\dot{q}^i\delta f \\
&= 0
\end{align*}
So these conserved quantities \emph{vanish}! In short, we're seeing an example of what physicists call \define{gauge symmetries}. This is a good topic for starting a new section.
%%
%%
%%
\section{Gauge Symmetries}
\label{sec:RelativisticLagrangians}
%We will continue the story of symmetry and Noether's theorem from the last section with a few more examples. We use principles of least action to conjure up Lagrangians for our systems, realizing that a given system may not have a unique Lagrangian but will often have an obvious natural Lagrangian. Given a Lagrangian we derive equations of motion from the Euler--Lagrange equations. Symmetries of $L$ guide us in finding conserved quantities, in particular Hamiltonians from time translation invariance, via Noether's theorem.
% This section also introduces gauge symmetry, and this is where we begin.
What are gauge symmetries?
\begin{enumerate}
\item These are symmetries that permute different mathematical descriptions of the same physical situation---in this case reparameterizations of a path.\label{i:same-phys}
\item These symmetries make it impossible to compute $q(t)$ given $q(0)$ and $\dot{q}(0)$: since if $q(t)$ is a solution so is $q(f(t))$ for any reparameterization $f:\mathbb{R}\rightarrow\mathbb{R}$. We have a high degree of non-uniqueness of solutions to the Euler--Lagrange equations. \label{i:non-unique_E-L_solns}
\item These symmetries give conserved quantities that work out to equal zero!\label{i:conserved-quant-vanish}
\end{enumerate}
Note that (\ref{i:same-phys}) is a subjective criterion, (\ref{i:non-unique_E-L_solns}) and (\ref{i:conserved-quant-vanish}) are objective, and (\ref{i:conserved-quant-vanish}) is easy to test, so we often use (\ref{i:conserved-quant-vanish}) to distinguish \emph{gauge} symmetries from \emph{physical} symmetries.
%%
%%
\subsection{Relativistic Hamiltonian}
What then is the Hamiltonian for special relativity theory? We're continuing here with the example problem of \S\ref{ssec:RelativisticFreeParticle}. Well, the Hamiltonian comes from Noether's theorem from \emph{time translation} symmetry,
\[
q_s(t) = q(t+s)
\]
and this is an example of a reparametrization (with $\delta f=1$), so we see from the previous results that the Hamiltonian is \emph{zero}!
\[
H = 0.
\]
Explicitly, $H=p_i\delta\qdot^i-\ell$ where under $q(t)\rightarrow q(t+s)$ we have $\delta\qdot^i=\qdot^i\delta f$, and so $\delta L=d\ell/dt$, which implies $\ell=p_i\delta q^i$. The result $H=0$ follows.
Now you know why people talk about ``the problem of time'' in general relativity theory, its glimmerings are seen in the flat Minkowski spacetime of special relativity. You may think it's nice and simple to have $H=0$, but in fact it means that there is no temporal evolution possible! So we can't establish a dynamical theory on this footing! That's bad news. (Because it means you might have to solve the static equations for the 4D universe as a whole, and that's impossible!)
But there is another conserved quantity deserving the title of ``energy'' which is \emph{not} zero, and it comes from the symmetry,
\[
q_s(t) = q(t) + s \,w
\]
where $w\in\mathbb{R}^{n+1}$ and $w$ points in some timelike direction.
\begin{figure}[ht]
\centering
\psfrag{w}{$w$}\psfrag{q}{$q$}\psfrag{qs}{$q_s$}
\includegraphics[width=.2\textwidth]{images/timelike-translation}
%\caption{A timelike translation.}
%\label{fig:timelike-translation}
\end{figure}
In fact \emph{any} vector $w$ gives a conserved quantity,
\begin{align*}
\delta L &= \frac{\pa L}{\pa q^i}\delta q^i
+ \frac{\pa L}{\pa\dot{q}^i}\delta\dot{q}^i \\
&= p_i\delta\dot{q}^i,
\quad\text{(since $\pa L/\pa q^i=0$ and $\pa L/\pa\dot{q}^i=p_i$)} \\
&= p_i 0 \, = 0
\end{align*}
since $\delta q^i = w^i$, $\delta\dot{q}^i=\dot{w}^i=0$. This is our $\dot{\ell}$ from Noether's theorem with $\ell=0$, so Noether's theorem says that we get a conserved quantity
\[
p_i\delta q^i - \ell = p_i w^i
\]
namely, the \define{momentum in the $w$ direction}. We know $\dot{p}=0$
from the Euler--Lagrange equations, for our free particle, but here we
see it coming from spacetime translation symmetry:
\begin{align*}
p = &(p_0,p_1,\ldots,p_n) \\
& p_0\text{ is energy},\quad (p_1,\ldots,p_n)\text{ is spatial momentum}.
\end{align*}
We've just about exhausted all the basic stuff that we can learn from the free particle. So next we'll add some external force via an electromagnetic field.
%%
%%
%%
\section{Relativistic Particle in an Electromagnetic Field}
\label{sec:RelativisticParticle_EM-field}
The electromagnetic field is described by a 1-form $A$ on spacetime, $A$ is the \define{vector potential}, such that
\begin{equation}
dA = F
\end{equation}
is a 2-form containing the electric and magnetic fields,
\begin{equation}
F_{ij} = \frac{\pa A_j}{\pa x^i} - \frac{\pa A_i}{\pa x^j}
\end{equation}
We can write (for $Q$ having local charts to $\mathbb{R}^{n+1}$),
\begin{align*}
A &= A_0dx^0 + A_1dx^1 +\ldots A_ndx^n \\
\intertext{and then because $d^2 =0$}
dA &= dA_0dx^0 + dA_1dx^1 +\ldots dA_ndx^n
\end{align*}
and since the $A_j$ are just functions,
\[
dA_j = \pa_i A_jdx^i
\]
using the summation convention and $\pa_i := \pa/\pa x^i$. The reader can easily check that the components for $F =F_{01}dx^0\wedge dx^1 + F_{02}dx^0\wedge dx^2 + \ldots$, agrees with the matrix expression below (at least in 4 dimensions).
So, for example, in 4-dimensional spacetime
\[
F = \left(\begin{array}{cccc}
0 & E_1 & E_2 & E_3 \\
-E_1 & 0 & B_3 & -B_2 \\
-E_2 & -B_3 & 0 & B_1 \\
-E_3 & B_2 & -B_1 & 0 \\
\end{array}\right)
\]
where $E$ is the electric field and $B$ is the magnetic field. The action for a particle of charge $e$ is
\[
S = m\int_{t_0}^{t_1}\norm{\dot{q}}\,dt + e \int_{q} A
\]
here
\begin{align*}
&\int_{t_0}^{t_1}\norm{\dot{q}}\,dt = \text{ proper time}, \\
&\int_{q} A = \text{ integral of $A$ along the path $q$}.
\end{align*}
Note that since $A$ is a 1-form we can integrate it over an oriented manifold,
but one can also write the path integral using time $t$ as a parameter, with
$A_i\qdot^i\,dt$ the differential, after $dq^i=\qdot^idt$.
% \vspace{2ex}\noindent (Week 4, April 18, 20, 22.)\vspace{2ex}
The Lagrangian in the above action, for a charge $e$ with mass $m$ in an electromagnetic potential $A$ is
\begin{equation}
\label{eq:Lagrangian-relativistic-EM}
L(q,\dot{q}) = m\norm{\dot{q}} + eA_i\dot{q}^i
\end{equation}
so we can work out the Euler--Lagrange equations:
\begin{align*}
p_i = \frac{\pa L}{\pa\dot{q}^i} &= m\frac{\dot{q}_i}{\norm{\dot{q}}} + eA_i\\
&= m v_i + e \,A_i
\end{align*}
where $v\in\mathbb{R}^{n+1}$ is the velocity, normalized so that $\norm{v}=1$. Note that \emph{now momentum is no longer mass times velocity}! That's because we're in $n+1$-d spacetime, the momentum is an $n+1$-vector. Continuing the analysis, we find the force
\begin{align*}
F_i = \frac{\pa L}{\pa q^i} &= \frac{\pa}{\pa q^i}\Bigl(e\,A_j\dot{q}^j\Bigr)\\
&= e\frac{\pa A_j}{\pa q^i} \dot{q}^j
\end{align*}
So the Euler--Lagrange equations say (noting that $A_i=A_j\Bigl(q(t)\Bigr)$:
\begin{align*}
\dot{p} &= F \\
\frac{d}{dt}\Bigl(mv_i+eA_i\Bigr) &= e\frac{\pa A_j}{\pa q^i}\dot{q}^j\\
m\frac{d v_i}{dt} &= e\frac{\pa A_j}{\pa q^i}\dot{q}^j - e\frac{d A_i}{dt}\\
m\frac{d v_i}{dt} &= e\frac{\pa A_j}{\pa q^i}\dot{q}^j
- e\frac{\pa A_i}{\pa q^j}\dot{q}^j\\
&= e\left(\frac{\pa A_j}{\pa q^i} - \frac{\pa A_i}{\pa q^j}\right)\dot{q}^j
\end{align*}
the term in parentheses is $\fvect{F}_{ij}=$~the electromagnetic field, $F=dA$. So we get the following equations of motion
% \begin{equation}
% \boxed{
% \raisebox{15pt}[20pt][0pt]{} \gapleft
% m\frac{d v_i}{dt} = e\fvect{F}_{ij}\dot{q}^j, \quad\text{(Lorentz force law)}
% \gapright
% \raisebox{-15pt}[0pt][15pt]{}
% }
% \end{equation}
\begin{equation}
\boxed{
\eqngapabove \gapleft
m\frac{d v_i}{dt} = e\fvect{F}_{ij}\dot{q}^j, \quad\text{(Lorentz force law)}
\gapright \eqngapbelow
}
\end{equation}
\noindent (Usually called the ``Lorenz'' force law.)
%%
%%
%%
\section{Lagrangian for a String}
So we've looked at a point particle and tried
\[
S = m\cdot(\text{arclength}) + \int A
\]
or with `proper time' instead of `arclength', where the 1-from $A$ can be integrated over a 1-dimensional path. A generalization (or specialization, depending on how you look at it) would be to consider a Lagrangian for an extended object.
In string theory we boost the dimension by +1 and consider a string tracing out a 2D surface as time passes (Fig.~\ref{fig:string-worldtube}).
\begin{figure}[ht]
\centering
%\psfrag{becomes}{becomes}
\includegraphics[width=.25\textwidth]{images/string-worldtube}
\caption{Worldtube of a closed string.}
\label{fig:string-worldtube}
\end{figure}
Can you infer an appropriate action for this system? Remember, the physical or physico-philosophical principle we've been using is that the path followed by physical objects minimizes the ``activity'' or ``aliveness'' of the system. Given that we presumably cannot tamper with the length of the closed string, then the worldtube quantity analogous to arclength or proper time would be the area of the worldtube (or worldsheet for an open string). If the string is also assumed to be a source of electromagnetic field then we need a 2-form to integrate over the 2D worldtube analogous to the 1-form integrated over the pathline of the point particle. In string theory this is usually the ``Kalb-Ramond field'', call it $B$. To recover electrodynamic interactions it should be antisymmetric like $A$, but its tensor components will have two indices since it's a 2-form. The string action can then be written
\begin{equation}
\label{eq:string-action}
S = \alpha\cdot(area) + e \int B
\end{equation}
We've also replaced the point particle mass by the string tension $\alpha$ [mass$\cdot$length$^{-1}$] to obtain the correct units for the action (since replacing arclength by area meant we had to compensate for the extra length dimension in the first term of the above string action).
This may still seem like we've pulled a rabbit out of a hat. But we haven't checked that this action yields sensible dynamics yet! But supposing it does, then would it justify our guesswork and intuition in arriving at Eq.\eqref{eq:string-action}? Well by now you've probably realized that one can have more than one form of action or Lagrangian that yields the same dynamics. So provided we supply reasonabe physically realistic heuristics then whatever Lagrangian or action that we come up with will stand a good chance of describing some system with a healthy measure of physical verisimilitude.
That's enough about string for now. The point was to illustrate the type of reasoning that one can use in conjuring up a Lagrangian. It's particularly useful when Newtonian theory cannot give us a head start, i.e., in relativistic dynamics and in the physics of extended particles.
%%
%%
\section{Another Lagrangian for Relativistic Electrodynamics}
In \S~\ref{sec:RelativisticParticle_EM-field},
Eq.\eqref{eq:Lagrangian-relativistic-EM} we saw an example of a
Lagrangian for relativistic electrodynamics that had awkward
reparametrization symmetries, meaning that $H=0$ and there were
non-unique solutions to the Euler--Lagrange equations arising from
applying gauge transformations. This freedom to change the gauge can
be avoided.
Recall Eq.\eqref{eq:Lagrangian-relativistic-EM}, which was a Lagrangian for a charged particle with reparametrization symmetry
\[
L = m\norm{\dot{q}} + eA_i\dot{q}^i
\]
just as for an uncharged relativistic particle. But there's another Lagrangian we can use that doesn't have this gauge symmetry:
\begin{equation}
L = \frac{1}{2}m\dot{q}\vdot\dot{q} + eA_i\dot{q}^i
\end{equation}
This one even has some nice features.
\begin{itemize}
\item It looks formally like ``$\frac{1}{2}mv^2$'', familiar from nonrelativistic mechanics.
\item There's no ugly square root, so it's everywhere differentiable, and there's no trouble with paths being timelike or spacelike in direction, they are handled the same.
\end{itemize}
What Euler--Lagrange equations does this Lagrangian yield?
\begin{align*}
p_i &= \frac{\pa L}{\pa\dot{q}^i} = m\dot{q}_i + eA_i \\
F_i &= \frac{\pa L}{\pa q^i} = e\frac{\pa A_j}{\pa q^i}\dot{q}^j
\end{align*}
Very similar to before! The Euler--Lagrange equations then say
\begin{align*}
\frac{d}{dt}\Bigl(m\dot{q}_i+eA_i\Bigr) &= e\frac{\pa A_j}{\pa q^i}\dot{q}^j\\
m\ddot{q}_i &= e\fvect{F}_{ij}\dot{q}^j
\end{align*}
almost as before. (I've taken to using $\fvect{F}$ here for the
electromagnetic field tensor to avoid clashing with $F$ for the
generalized force.) The only difference is that we have $m\ddot{q}_i$
instead of $m\dot{v}_i$ where $v_i=\dot{q}_i/\norm{\dot{q}}$. So the
old Euler--Lagrange equations of motion reduce to the new ones if we
pick a parametrization with $\norm{\dot{q}}=1$, which would be a
parametrization by proper time for example.
Let's work out the Hamiltonian for this
\[
L = \frac{1}{2}m\dot{q}\vdot\dot{q} + eA_i\dot{q}^i
\]
for the relativistic charged particle in an electromagnetic field. Recall that for our reparametrization-invariant Lagrangian
\[
L = m\sqrt{\dot{q}_i\dot{q}^i} + eA_iq^i
\]
we got $H=0$, time translation was a gauge symmetry. With the new Lagrangian it's not! Indeed
\[
H = p_i\dot{q}^i -L
\]
and now
\begin{align*}
p_i &= \frac{\pa L}{\pa\dot{q}^i} = m\qdot^i + e A_i \\
\intertext{so}
H &= (m\dot{q}_i + eA_i)\dot{q}^i - (\tfrac{1}{2}m\dot{q}_i\dot{q}^i + eA_i\dot{q}^i) \\
&= \tfrac{1}{2}m\dot{q}_i\dot{q}^i
\end{align*}
%%
\paragraph{Comments.} This is vaguely like how a nonrelativistic particle in a potential $V$ has
\[
H=p_i\dot{q}^i-L=2K-(K-V)=K+V,
\]
but now the ``potential' $V=eA_i\dot{q}^i$ is \emph{linear in velocity}, so now
\[
H=p_i\dot{q}^i-L=(2K-V)-(K-V)=K.
\]
As claimed $H$ is not zero, and the fact that it's conserved says $\norm{\dot{q}(t)}$ is constant as a function of $t$, so the particle's path is parameterized by proper time up to rescaling of $t$. That is, we're getting ``conservation of speed'' rather than some more familiar ``conservation of energy''. The reason is that this Hamiltonian comes from the symmetry
\[
q_s(t) = q(t+s)
\]
instead of spacetime translation symmetry
\[
q_s(t) = q(t) + s\,w,\quad w\in\mathbb{R}^{n+1}
\]
the difference is illustrated schematically in Fig.~\ref{fig:propertime-rescale-vs-translation}.
\begin{figure}[ht]
\centering
\psfrag{qs=q(t+s)}{$q_s(t)=q(t+s)$}
\psfrag{qs=q+sw}{$q_s(t)=q(t)+sw$}
\psfrag{w}{$w$}
\includegraphics[width=.45\textwidth]{images/propertime-rescale-vs-translation}
\caption{Proper time rescaling vs spacetime translation.}
\label{fig:propertime-rescale-vs-translation}
\end{figure}
Our Lagrangian
\[
L(q,\dot{q}) = \tfrac{1}{2}m\norm{\dot{q}}^2 + A_i(q)\dot{q}^i
\]
has time translation symmetry iff $A$ is translation invariant (but it's highly unlikely a given system of interest will have $A(q)=A(q+sw)$). In general then there's no conserved ``energy'' for our particle corresponding to translations in time.
%%
%%
%%
\section{The Free Particle in General Relativity}
\label{sec:GR-particle}
In general relativity, spacetime is an $(n+1)$-dimensional Lorentzian
manifold, namely a smooth $(n+1)$-dimensional manifold $Q$
with a \emph{Lorentzian metric} $g$. We define the metric as follows.
\begin{enumerate}
\item For each $x\in Q$, we have a bilinear map
\begin{align*}
g(x):T_x Q\times T_x Q &\longrightarrow\mathbb{R} \\
(v,w) &\longmapsto g(x)(v,w)
\end{align*}
or we could write $g(v,w)$ for short.
\item With respect to some basis of $T_x Q$ we have
\begin{align*}
g(v,w) &= g_{ij}v^iw^j \\
g_{ij} &=\left(\begin{array}{cccc}
1 & 0 & \ldots & 0 \\
0 & -1 & & 0 \\
\vdots & & \ddots & \vdots \\
0 & 0 & \ldots & -1
\end{array}\right)
\end{align*}
Of course we can write $g(v,w)=g_{ij}v^iw^j$ in any basis, but for different bases $g_{ij}$ will have a different form.
\item $g(x)$ varies smoothly with $x$.
\end{enumerate}
The Lagrangian for a free point particle in the spacetime $Q$ is
\begin{align*}
L(q,\dot{q}) &= m\sqrt{g(q)(\dot{q},\dot{q})} \\
&= m\sqrt{g_{ij}\dot{q}^i\dot{q}^j}
\end{align*}
just like in special relativity but with $\eta_{ij}$ replaced by $g_{ij}$. Alternatively we could just as well use
\begin{align*}
L(q,\dot{q}) &= \tfrac{1}{2}m g(q)(\dot{q},\dot{q}) \\
&= \tfrac{1}{2}m g_{ij}\dot{q}^i\dot{q}^j
\end{align*}
The big difference between these two Lagrangians is that now spacetime translation symmetry (and rotation, and boost symmetry) is gone! So there is no conserved energy-momentum (nor angular momentum, nor velocity of center of energy) anymore!
Let's find the equations of motion. Suppose then $Q$ is a Lorentzian manifold with metric $g$ and $L:TQ\rightarrow\mathbb{R}$ is the Lagrangian of a free particle,
\[
L(q,\dot{q}) = \tfrac{1}{2}mg_{ij}\dot{q}^i\dot{q}^j
\]
We find equations of motion from the Euler--Lagrange equations, which
in this case start from
\[
p_i = \frac{\pa L}{\pa\dot{q}^i} = mg_{ij}\dot{q}^j
\]
The velocity $\dot{q}$ here is a tangent vector, the momentum $p$ is a cotangent vector, and we need the metric to relate them, via
\begin{align*}
g:T_q\mathcal{M}\times T_q\mathcal{M}&\longrightarrow\mathbb{R}\\
(v,w)&\longmapsto g(v,w)
\end{align*}
which gives
\begin{align*}
T_q\mathcal{M} &\longrightarrow T^\ast_q\mathcal{M} \\
v &\longmapsto g(v,-).
\end{align*}
In coordinates this would say that the tangent vector $v^i$ gets mapped to the cotangent vector $g_{ij}v^j$. This is lurking behind the passage from $\dot{q}^i$ to the momentum $mg_{ij}v^j$.
Getting back to the Euler--Lagrange equations,
\begin{align*}
p_i = \frac{\pa L}{\pa\dot{q}^i} &= mg_{ij}\dot{q}^j \\
F_i = \frac{\pa L}{\pa q^i}
&= \frac{\pa}{\pa q^i}\Bigl(\tfrac{1}{2}mg_{jk}(q)\dot{q}^j\dot{q}^k\Bigr)\\
&=\tfrac{1}{2}m\pa_{i}g_{jk}\dot{q}^j\dot{q}^k,
\quad(\text{where } \pa_i=\frac{\pa}{\pa q^i}).
\end{align*}
So the Euler--Lagrange equations say
\[
\frac{d}{dt}mg_{ij}\dot{q}^j = \tfrac{1}{2}m\pa_{i}g_{jk}\dot{q}^j\dot{q}^k.
\]
The mass factors away, so the motion is independent of the mass! Essentially we have a geodesic equation.
We can rewrite this \define{geodesic equation} as follows
\begin{align*}
\frac{d}{dt}g_{ij}\dot{q}^j &= \tfrac{1}{2}\pa_{i}g_{jk}\dot{q}^j\dot{q}^k \\
\hspace{-3ex}\therefore\quad
\pa_{k}g_{ij}\dot{q}^k\dot{q}^j + g_{ij}\ddot{q}^j
&= \tfrac{1}{2}\pa_{i}g_{jk}\dot{q}^j\dot{q}^k \\
\hspace{-3ex}\therefore\quad
g_{ij}\ddot{q}^j &= \bigl(\tfrac{1}{2}\pa_{i}g_{jk}-\pa_{k}g_{ij}\bigr)\dot{q}^j\dot{q}^k \\
&= \tfrac{1}{2}\bigl(\pa_{i}g_{jk}-\pa_{k}g_{ij}-\pa_{j}g_{ki}\bigr)\dot{q}^j\dot{q}^k
\end{align*}
where the last line follows by symmetry of the metric, $g_{ik}=g_{ki}$.
Now let,
\[
\Gamma_{ijk} = -\bigl(\pa_{i}g_{jk}-\pa_{k}g_{ij}-\pa_{j}g_{ki}\bigr)
\]
the minus sign being just a convention (so that we agree with everyone else). This defines what we call the Christoffel symbols $\Gamma^i_{jk}$. Then
\begin{align*}
\ddot{q}_i = g_{ij}\ddot{q}^j &= -\Gamma_{ijk}\dot{q}^j\dot{q}^k \\
\hspace{-3ex}\therefore\quad
\ddot{q}^i &= -\Gamma^i_{jk}\dot{q}^j\dot{q}^k.
\end{align*}
So we see that $\ddot{q}$ can be computed in terms of $\dot{q}$ and the Christoffel symbols $\Gamma^i_{jk}$, which is really a particular type of connection that a Lorentzian manifold has (the Levi-Civita connection), a ``connection'' is just the rule for parallel transporting tangent vectors around the manifold.
Parallel transport is just the simplest way to compare vectors at different points in the manifold. This allows us to define, among other things, a ``covariant derivative''.
%%
%%
\section{A Charged Particle on a Curved Spacetime}
We can now apply what we've learned in consideration of a charged particle, of charge $e$, in an electromagnetic field with potential $A$, in our Lorentzian manifold. The Lagrangian would be
\[
L = \tfrac{1}{2}m g_{jk}\dot{q}^j\dot{q}^k + e A_i \dot{q}^i
\]
which again was conjured up be replacing the flat space metric $\eta_{ij}$ by the metric for GR $g_{ij}$. Not surprisingly, the Euler--Lagrange equations then yield the following equations of motion,
\[
m\ddot{q}_i = -m\Gamma_{ijk}\dot{q}^j\dot{q}^k + e \fvect{F}_{ij}\dot{q}^j.
\]
If you want to know more about Lagrangians for general relativity we recommend the paper by Peldan~\cite{kn:Peldan94}, and also the ``black book'' of Misner, Thorne \& Wheeler~\cite{kn:MisnerThorne&Wheeler}.
%%
%%
%%
\section{The Principle of Least Action and Geodesics}
\label{sec:PrincipleOfLeastAction}
% \noindent (Week 4, April 18, 20, 22.) \vspace{2ex}
%%
%%
\subsection{Jacobi and Least Time vs Least Action}
We've mentioned that Fermat's principle of least time in optics is analogous to the principle of least action in particle mechanics. This analogy is strange, since in the principle of least action we fix the time interval $q:[0,1]\rightarrow Q$. Also, if one imagines a force on a particle resulting from a potential gradient at an interface as analogous to light refraction then you also get a screw-up in the analogy (Fig.~\ref{fig:leasttime-action-analogy}).
\begin{figure}[ht]
\centering
\psfrag{light}{light} \psfrag{particle}{particle}
\psfrag{faster}{faster} \psfrag{slower}{slower}
\psfrag{V high}{$V$ high} \psfrag{V low}{$V$ low}
\psfrag{n high}{$n$ high} \psfrag{n low}{$n$ low}
\includegraphics[width=.75\textwidth]{images/leasttime-action-analogy}
\caption{Least time versus least action.}
\label{fig:leasttime-action-analogy}
\end{figure}
Nevertheless, Jacobi was able to reinterpret the mechanics of a particle as an optics problem and hence ``unify'' the two minimization principles. First, let's consider light in a medium with a varying index of refraction $n$ (recall $1/n\propto$ speed of light). Suppose it's in $\mathbb{R}^n$ with its usual Euclidean metric. If the light is trying to minimize the \emph{time}, its trying to minimize the arclength of its path in the metric
\[
g_{ij} = n^2\delta_{ij}
\]
that is, the index of refraction $n:\mathbb{R}^n\rightarrow (0,\infty)$, times the usual Euclidean metric
\[
\delta_{ij} = \left( \begin{array}{ccc}
1 & & \text{\raisebox{-2ex}{\huge{0}}} \\
& \ddots & \\
\text{\huge{0}} & & 1 \\
\end{array} \right)
\]
% \[
% \begin{matrix}
% 1 & & \text{\huge{0}} \\
% & \ddots & \\
% \text{\huge{0}} & & 1
% \end{matrix}
% \]
This is just like the free particle in general relativity (minimizing its proper time) except that now $g_{ij}$ is a Riemannian metric
\begin{align*}
g(v,w) &= g_{ij}v^i w^j \\
\text{where}\quad g(v,v) &\ge 0
\end{align*}
So we'll use the same Lagrangian:
\[
L(q,\dot{q}) = \sqrt{g_{ij}(q)\dot{q}^i\dot{q}^j}
\]
and get the same Euler--Lagrange equations:
\be
\label{eq:E-L_free-GR}
\frac{d^2 q^i}{d t^2} + \Gamma^i_{jk}\dot{q}^j\dot{q}^k = 0
\ee
if $q$ is parameterized by arclength or more generally
\[
\lVert\dot{q}\rVert=\sqrt{g_{ij}(q)\dot{q}^i\dot{q}^j} = \,\text{constant}.
\]
As before the Christoffel symbols $\Gamma$ are built from the derivatives of
the metric $g$.
Now, what Jacobi did is show how the motion of a particle in a potential could be viewed as a special case of this. Consider a particle of mass $m$ in Euclidean $\mathbb{R}^n$ with potential $V:\mathbb{R}^n\rightarrow\mathbb{R}$. It satisfies $F=ma$, i.e.,
\be\label{eq:Newton-2}
m\frac{d^2 q^i}{d t^2} = -\pa_i V
\ee
How did Jacobi see \eqref{eq:Newton-2} as a special case of \eqref{eq:E-L_free-GR}? He considered a particle of energy $E$ and he chose the index of refraction to be
\[
n(q) = \sqrt{\frac{2}{m}\bigl(E-V(q)\bigr)}
\]
which is just the \define{speed} of a particle of energy $E$ when the potential energy is $V(q)$, since
\[
\sqrt{\frac{2}{m}(E-V)} = \sqrt{\frac{2}{m}\frac{1}{2}m\lVert\dot{q}\rVert^2} = \lVert\dot{q}\rVert.
\]
Note: this is precisely backwards compared to optics, where $n(q)$ is proportional to the \emph{reciprocal} of the speed of light!! But let's see that it works.
\begin{align*}
L &= \sqrt{g_{ij}(q)\qdot^i\qdot^j} \\
&= \sqrt{n^2(q)\qdot^i\qdot^j} \\
&= \sqrt{2/m (E-V(q))\qdot^2}
\end{align*}
where $\qdot^2 =\qdot \vdot\qdot$ is just the usual Euclidean dot product, $v\vdot w =\delta_{ij}v^iw^j$. We get the Euler--Lagrange equations,
\begin{align*}
p_i = \frac{\pa L}{\pa\qdot^i} &= \sqrt{\frac{2}{m} (E-V)}\,\cdot\frac{\qdot}{\norm{\qdot}} \\
F_i = \frac{\pa L}{\pa q^i} &= \pa_i\sqrt{\frac{2}{m} (E-V(q))}\,\cdot\norm{\qdot} \\
&=\frac{1}{2} \frac{-2/m \pa_i V}{\sqrt{2/m (E-Vq)}}\,\cdot\norm{\qdot}
\end{align*}
Then $\pdot=F$ says,
\[
\frac{d}{dt}\sqrt{2/m (E-V(q))}\,\cdot\frac{\qdot_i}{\norm{\qdot}} = -\frac{1}{m}\pa_i V\frac{\norm{\qdot}}{\sqrt{2/m (E-V)}}
\]
Jacobi noticed that this is just $F=ma$, or $m\ddot q_i = -\pa_i V$, that is, provided we reparameterize $q$ so that,
\[
\norm{\qdot} = \sqrt{2/m (E-V(q))}.
\]
Recall that our Lagrangian gives reparameterization invariant Euler--Lagrange equations! This is the unification between least time (from optics) and least action (from mechanics) that we sought.
%%
%%
\subsection{The Ubiquity of Geodesic Motion}
We've seen that many classical systems trace out paths that are geodesics, i.e., paths $q:[t_0,t_1]\rightarrow Q$ that are critical points of
\[
S(q) = \int_{t_0}^{t_1}\sqrt{g_{ij}\qdot^i\qdot^j}\,dt
\]
which is proper time when $(Q,g)$ is a Lorentzian manifold, or arclength when $(Q,g)$ is a Riemannian manifold. We have
\begin{enumerate}
\item The metric at $q\in Q$ is,
\begin{align*}
g(q):T_qQ\times T_qQ &\rightarrow\mathbb{R} \\
(v,w) &\mapsto g(v,w)
\end{align*}
and it is bilinear.
\item w.r.t a basis of $T_qQ$
\[
g(v,w) = \delta_{ij}v^iw^j
\]
\item $g(q)$ varies smoothly with $q\in Q$.
\end{enumerate}
An important distinction to keep in mind is that Lorentzian manifolds represent \emph{spacetimes}, whereas Riemannian manifolds represent that we'd normally consider as just \emph{space}.
We've seen at least three important things.
\begin{itemize}
\item[(1)] In the geometric optics approximation, light in $Q=\mathbb{R}^n$ acts like particles tracing out geodesics in the metric
\[
g_{ij} = n(q)^2\delta_{ij}
\]
where $n:Q\rightarrow(0,\infty)$ is the index of refraction function.
\item[(2)] Jacobi saw that a particle in $Q=\mathbb{R}^n$ in some potential $V:Q\rightarrow\mathbb{R}$ traces out geodesics in the metric
\[
g_{ij} = \frac{2}{m}(E-V)\delta_{ij}
\]
\emph{if} the particle has energy $E$ (where\footnote{The case $V>E$, if they exist, would be classically forbidden regions.} $VTstarQ}).
\begin{figure}[ht]
\centering
\psfrag{TqQ}[l]{$T_qQ$}
\psfrag{TQ}[l]{$TQ$}
\psfrag{T(q,qdot)TqQ}[l]{$T_{(q,\qdot)}T_qQ$}
\psfrag{q}[c]{$q$} \psfrag{Q}[c]{$Q$}
\includegraphics[width=.5\textwidth]{images/mapping_TQ->TstarQ}
\caption{}
\label{fig:mapping_TQ->TstarQ}
\end{figure}
To understand ``$\frac{\pa L}{\pa\qdot^i}$'' in a coordinate-free way, we treat it as the ``differential of $L$ in the vertical direction''---i.e., the $\qdot^i$ directions. To do this, note that the differential of
\[
\begin{array}{ccl}
\pi \colon TQ&\longrightarrow& Q\\
(q,\qdot)&\longmapsto& q
\end{array}
\]
is a map
\[
d\pi \colon TTQ\longrightarrow TQ.
\]
We define the bundle of \define{vertical vectors} to be
\[ VTQ = \{v \in TTQ : \; d\pi(v) = 0\} \subseteq TTQ .\]
That is, it consists of tangent vectors to $TQ$ that are sent to zero
by $d \pi$. Intuitively, they point `straight up'.
The differential of $L$ at some point $(q,\qdot)\in TQ$ is a map from $TTQ$ to $\mathbb{R}$, so we have
\begin{align*}
(dL)_{(q,\qdot)} \in T^\ast_{(q,\qdot)}TQ\\
\intertext{that is,}
dL_{(q,\qdot)}:T_{(q,\qdot)}TQ\longrightarrow\mathbb{R}.
\end{align*}
We can restrict this to $VTQ\subseteq TTQ$, getting
\[
f:V_{(q,\qdot)}TQ\longrightarrow\mathbb{R}.
\]
%%
%% Figure wrapping note: Needs to be in a new paragraph.
%% In \begin{wrapfigure}{r}[9ex]{.33\textwidth}
%% the first argument is the alignment, the second option
%% is the number of lines wrapped in some units, then the
%% figure width.
%%
\noindent But note
\[
V_{(q,\qdot)}TQ \cong T_{q,\qdot}(T_qQ)
\]
\begin{wrapfigure}{r}[9ex]{.33\textwidth}
\psfrag{TqQ}[c]{$T_qQ$}
\psfrag{V(q,qdot)TQ}[l]{$V_{(q,\qdot)}TQ$}
\psfrag{T(q,qdot)TqQ}[l]{$T_{(q,\qdot)}T_qQ$}
\psfrag{q}[c]{$q$} \psfrag{Q}[c]{$Q$}
\includegraphics[width=.33\textwidth]{images/V(q,qdot)}
\end{wrapfigure}
%\begin{figure}
% \centering
% \psfrag{TqQ}[c]{$T_qQ$}
% \psfrag{V(q,qdot)TQ}[l]{$V_{(q,\qdot)}TQ$}
% \psfrag{T(q,qdot)TqQ}[l]{$T_{(q,\qdot)}T_qQ$}
% \psfrag{q}[c]{$q$} \psfrag{Q}[c]{$Q$}
% \includegraphics[width=.45\textwidth]{images/V(q,qdot)}
% \caption{}
% \label{fig:V(q,qdot)}
%\end{figure}
and since $T_q Q$ is a vector space,
\[
T_{(q,\qdot)}T_q Q \cong T_q Q
\]
in a canonical way\footnote{Each tangent space $T_vV$ of a vector space $V$
is isomorphic to $V$ in a canonical way.}. So $f$ gives a linear map
\begin{align*}
p:T_q Q\longrightarrow\mathbb{R} \\
\intertext{that is,}
p\in T^\ast_q Q
\end{align*}
this is the momentum!
% \vspace*{2ex}
% \noindent(Week 6, May 2, 4, 6.)\vspace{2ex}
Given $L \colon TQ\rightarrow T^\ast Q$, we now know a coordinate-free way of describing the map
\begin{align*}
\lambda \colon TQ&\longrightarrow T^\ast Q\\
(q,\qdot)&\longmapsto (q,p)
\end{align*}
given in local coordinates by
\[
p_i = \frac{\pa L}{\pa \qdot^i}.
\]
We say $L$ is \define{regular} if $\lambda$ is a diffeomorphism from $TQ$ to some open subset $X\subseteq T^\ast Q$. In this case we can describe what our system is doing equally well by specifying position and velocity,
\[
(q,\qdot)\in TQ
\]
or position and momentum
\[
(q,p) = \lambda(q,\qdot)\in X.
\]
We call $X$ the \define{phase space} of the system. In practice often $X=T^\ast Q$, then $L$ is said to be \define{strongly regular}.
%%
%%
%%
\section{Regular and Strongly Regular Lagrangians}
This section discusses some examples of the above theory.
%%
%%
\subsection{Example: A Particle in a Riemannian Manifold with Potential $V(q)$}
For a particle in a Riemannian manifold $(Q,q)$ in a potential $V \colon Q\rightarrow\mathbb{R}$ has Lagrangian
\[
L(q,\qdot) = \frac{1}{2}mg_{ij}\qdot^i \qdot^j - V(q)
\]
Here
\[
p_i = \frac{\pa L}{\pa\qdot^i} = mg_{ij}\qdot^j
\]
so
\[
\lambda(q,\qdot) = \bigl(q,mg(\qdot,-)\bigr)
\]
$L$ is strongly regular in this case because
\begin{align*}
T_q Q&\longrightarrow T^\ast_q Q\\
v &\longrightarrow g (v,-)
\end{align*}
is 1-1 and onto, i.e., the metric is nondegenerate. Thus $\lambda$ is a diffeomorphism, which in this case extends to all of $T^\ast Q$.
%%
%%
\subsection{Example: General Relativistic Particle in an E-M Potential}
For a general relativistic particle with charge $e$ in an electromagnetic vector potential $A$ the Lagrangian is
\[
L(q,\qdot) = \frac{1}{2}mg_{ij}\qdot^iq^j - eA_i\qdot^i
\]
and thus
\[
p_i = \frac{\pa L}{\pa\qdot^i} = mg_{ij}\qdot^iq^j + eA_i.
\]
This $L$ is still strongly regular, but now each map
\begin{align*}
\lambda\left.\right|_{T_q Q} \colon T_q Q&\longrightarrow T^\ast
_qQ \\
\qdot &\longmapsto m\,g(\qdot,-)+eA(q)
\end{align*}
is \emph{affine} rather than linear\footnote{All linear transforms are affine, but affine transformations include translations, which are nonlinear. In affine geometry there is no defined origin. For the example the translation is the ``$+eA(q)$'' part.}.
%%
%%
\subsection{Example: Free General Relativistic Particle with Reparameterization Invariance}
The free general relativistic particle with reparameterization invariant Lagrangian has
\[
L(q,\qdot) = m\sqrt{g_{ij}\qdot^i\qdot^j}.
\]
This is terrible from the perspective of regularity---it's not differentiable when $g_{ij}\qdot^i\qdot^j$ vanishes, and undefined when the same is negative. Where it \emph{is} defined
\[
p_i=\frac{\pa L}{\pa\qdot^i} = \frac{mg_{ij}\qdot^j}{\norm{\qdot}}
\]
(where $\qdot$ is timelike), we can ask about regularity. Alas, the map $\lambda$ is not 1-1 where defined since multiplying $\qdot$ by some number has \emph{no effect} on $p$! (This is related to the reparameterization invariance---this always happens with reparameterization-invariant Lagrangians.)
%%
%%
\subsection{Example: A Regular but not Strongly Regular Lagrangian}
Here's a Lagrangian that's regular but not strongly regular. Let $Q=\mathbb{R}$ and
\[
L(q,\qdot)=f(\qdot)
\]
so that
\[
p=\frac{\pa L}{\pa\qdot}=f^\prime(\qdot)
\]
This will be regular but not strongly so if $f' \colon \mathbb{R}\rightarrow\mathbb{R}$ is a diffeomorphism from $\mathbb{R}$ to some proper subset $U\subset\mathbb{R}$. For example, take $f(\qdot)=e^{\qdot}$ so $f' \colon \mathbb{R}\stackrel{\sim}{\rightarrow}(0,\infty)\subset\mathbb{R}$. So
\[
\makebox[12em][0mm]{s}L(q,\qdot) = e^{\qdot}\hspace{5em}
\xy
{\ar (0,0)*{}; (30,0)*{}};
(0,2)*+{};(30,15)*+{}
**\crv{~**\dir{-}(27,2)&(30,15)};
(35,7.5)*[r]{\txt{positive slope}}
%(35,7.5)*[r]{\text{positive slope}}
\endxy
\]
or
\[
\makebox[10em][0mm]{s}L(q,\qdot)=\sqrt{1+\qdot^2}\hspace{5em}
\xy
{\ar (0,0)*{}; (30,0)*{}};
(0,15); (15,0) **\dir{.};
(15,0); (30,15) **\dir{.};
(0,16)*+{};(30,16)*+{}
**\crv{(10,7.5)&(15,5)&(20,7.5)};
(35,7.5)*[r]{\txt{slope between\\$-1$ and 1}}
\endxy
\]
and so forth.
%%
%%
%%
\section{Hamilton's Equations}
Now let's assume $L$ is regular, so
\begin{align*}
\lambda \colon TQ& \longrightarrow X\subseteq T^\ast Q \\
(q,\qdot)&\longmapsto(q,p)
\end{align*}
is a diffeomorphism.
This lets us have the best of both worlds: we can identify $TQ$ with $X$ using $\lambda$. This lets us treat $q^i$, $p^i$, $L$, $H$, etc., all as functions on $X$ (or $TQ$), thus writing
\begin{align*}
\qdot^i &\qquad\text{(function on $TQ$)} \\
\intertext{for the function}
\qdot^i\circ\lambda^{-1} &\qquad\text{(function on $X$)}
\end{align*}
In particular
\[
\pdot_i := \frac{\pa L}{\pa q^i}\qquad\text{(Euler--Lagrange equation)}
\]
which is really a function on $TQ$, will be treated as a function on $X$. Now let's calculate:
\begin{align*}
dL &=\frac{\pa L}{\pa q^i}dq^i +\frac{\pa L}{\pa\qdot^i}d\qdot^i \\
&=\pdot_idq^i+p_id\qdot^i
\end{align*}
while
\begin{align*}
dH &=d(p_i\qdot^i - L)\\
&=\qdot^i dp_i + p_id\qdot^i - dL \\
&=\qdot^idp_i + p_id\qdot^i - (\pdot_idq^i+p_id\qdot^i) \\
&=\qdot^idp_i - \pdot_id\qdot^i
\end{align*}
so
\[
dH = \qdot^idp_i - \pdot^idq_i.
\]
Assume the Lagrangian $L \colon TQ\rightarrow\mathbb{R}$ is regular, so
\begin{align*}
\lambda \colon TQ&\stackrel{\sim}{\longrightarrow}X\subseteq T^\ast Q \\
(q,\qdot)&\longmapsto(q,p)
\end{align*}
is a diffeomorphism. This lets us regard both $L$ and the Hamiltonian $H=p_i\qdot^i-L$ as functions on the phase space $X$, and use $(q^i,\qdot^i)$ as local coordinates on $X$. As we've seen, this gives us
\begin{align*}
dL &=\pdot_idq^i +p_id\qdot^i \\
dH&=\qdot^idp_i - \pdot_idq^i.
\end{align*}
But we can also work out $dH$ directly, this time using local coordinates $(q^i,p_i)$, to get
\[
dH = \frac{\pa H}{\pa p^i}dp_i +\frac{\pa H}{\pa q^i}dq^i.
\]
Since $dp_i$, $dq^i$ form a basis of 1-forms, we conclude:
\[\boxed{
\gapabove\gapleft
\qdot^i = \frac{\pa H}{\pa p_i}, \qquad \pdot_i = -\frac{\pa H}{\pa q_i}
\gapbelow\gapright
}
\]
These are \define{Hamilton's Equations}.
%%
%%
\subsection{Hamilton and Euler--Lagrange}
Though $\qdot^i$ and $\pdot_i$ are just functions of $X$, when the Euler--Lagrange equations hold for some path $q \colon [t_0,t_1]\rightarrow Q$, they will be the time derivatives of $q^i$ and $p_i$. So when the Euler--Lagrange equations hold, Hamilton's equations describe the motion of a point $x(t)=\bigl(q(t),p(t)\bigr)\in X$. In fact, in this context, Hamilton's equations are just the Euler--Lagrange equations in disguise. The equation
\[
\qdot^i = \frac{\pa H}{\pa p_i}
\]
really just lets us recover the velocity $\qdot$ as a function of $q$ and $p$, inverting the formula
\[
p_i = \frac{\pa L}{\pa\qdot^i}
\]
which gave $p$ as a function of $q$ and $\qdot$. So we get a formula for the map
\begin{align*}
\lambda^{-1} \colon X&\longrightarrow TQ\\
(q,p)&\longmapsto(q,\qdot).
\end{align*}
Given this, the other Hamilton equation
\[
\pdot_i = -\frac{\pa H}{\pa q^i}
\]
is secretly the Euler--Lagrange equation
\[
\frac{d}{dt}\frac{\pa L}{\pa\qdot^i} =\frac{\pa L}{\pa q^i},
\]
or $\pdot_i =\frac{\pa L}{\pa q^i}$ for short. These are the same because
\[
\frac{\pa H}{\pa q^i} = \frac{\pa}{\pa q^i}\bigl(p_i\qdot^i-L\bigr) = -\frac{\pa L}{\pa q^i}.
\]
%%
%%
\subsubsection{Example: Particle in a Potential $V(q)$}
For a particle in $Q=\mathbb{R}^n$ in a potential $V \colon \mathbb{R}^n\rightarrow\mathbb{R}$ the system has Lagrangian
\[
L(q,\qdot)=\frac{m}{2}\norm{\qdot}^2-V(q)
\]
which gives
\begin{align*}
p&=m\qdot \\
\qdot&=\frac{p}{m}, \qquad (\text{though really that's } \qdot=\frac{g^{ij}p_j}{m})
\end{align*}
and Hamiltonian
\begin{align*}
H(q,p) = p_i\qdot^i-L&=\frac{1}{m}\norm{p}^2-\biggl(\frac{\norm{p}^2}{2m}-V\biggr) \\
&=\frac{1}{2m}\norm{p}^2+V(q).
\end{align*}
So Hamilton's equations say
\begin{align*}
\qdot^i =\frac{\pa H}{\pa p_i} &\quad\Rightarrow\quad \qdot=\frac{p}{m} \\
\pdot_i =-\frac{\pa H}{\pa q^i} &\quad\Rightarrow\quad \pdot=-\grad V
\end{align*}
The first just recovers $\qdot$ as a function of $p$; the second is $F=ma$.
%%
%%
\subsubsection{Note on Symplectic Structure}
Hamilton's equations push us toward the viewpoint where $p$ and $q$ have equal status as coordinates on the phase space $X$. Soon, we'll drop the requirement that $X\subseteq T^\ast Q$ where $Q$ is a configuration space. $X$ will just be a manifold equipped with enough structure to write down Hamilton's equations starting from any $H \colon X\rightarrow\mathbb{R}$.
The coordinate-free description of this structure is the major 20th century contribution to mechanics: a symplectic structure.
This is important. You might have some particles moving on a manifold like $S^3$, which is not symplectic. So the Hamiltonian mechanics point of view says that the abstract manifold that you are really interested in is something different: it must be a symplectic manifold. That's the phase space $X$. We'll introduce symplectic geometry more completely in later chapters.
%%
%%
\subsection{Hamilton's Equations from the Principle of Least Action}
Before, we obtained the Euler--Lagrange equations by associating an ``action'' $S$ with any $q \colon [t_0,t_1]\rightarrow Q$ and setting $\delta S=0$. Now let's get Hamilton's equations \emph{directly} by assigning an action $S$ to any path $x \colon [t_0,t_1]\rightarrow X$ and setting $\delta S=0$. Note: we don't impose any relation between $p$ and $q$, $\qdot$! The relation will follow from $\delta S=0$.
Let $P$ be the space of paths in the phase space $X$ and define the action
\[
S \colon P\longrightarrow\mathbb{R}
\]
by
\[
S(x) = \int_{t_0}^{t_1}(p_i\qdot^i-H)dt
\]
since $p_i \qdot^i - H$ is another way of writing the Lagrangian. More precisely, write our path $x$ as $x(t)=\bigl(q(t),p(t)\bigr)$ and let
\[
S(x) = \int_{t_0}^{t_1}\left[p_i(t)\frac{d}{dt}q^i(t)-H\bigl(q(t),p(t)\bigr)\right]dt
\]
we write $\frac{d}{dt}q^i$ instead of $\qdot^i$ to emphasize that we mean the time derivative rather than a coordinate in phase space.
Let's show $\delta S=0\Leftrightarrow$Hamilton's equations.
\begin{align*}
\delta S &=\delta\int(p_i\qdot^i-H)dt \\
&= \int\bigl(\delta p_i\qdot^i+p_i\delta\qdot^i-\delta H\bigr)dt \\
\intertext{then integrating by parts,}
&= \int\bigl(\delta p_i\qdot^i-\pdot_i\delta q^i-\delta H\bigr)dt \\
&= \int\Bigl(\delta p_i\qdot^i-\pdot_i\delta q^i
-\frac{\pa H}{\pa q^i}\delta q^i-\frac{\pa H}{\pa p_i}\delta p_i\Bigr)dt \\
&=\int\left(\delta p_i\Bigl(\qdot^i-\frac{\pa H}{\pa p_i}\Bigr)
+\delta q^i\Bigl(-\pdot_i-\frac{\pa H}{\pa q^i}\Bigr)\right)dt
\end{align*}
This vanishes $\forall \delta x=(\delta q,\delta p)$ if and only if Hamilton's equations
\[
\qdot^i =\frac{\pa H}{\pa p_i},\qquad p_i=-\frac{\pa H}{\pa q^i}
\]
hold, just as we hoped.
We've seen two principles of ``least action'':
\begin{enumerate}
\item For paths in configuration space $Q$, $\delta S=0 \Leftrightarrow$Euler--Lagrange equations.\label{it:Q-space-least-action}
\item For paths in phase space $X$, $\delta S=0\Leftrightarrow$Hamilton's equations.\label{it:X-space-least-action}
\end{enumerate}
Additionally, since $X\subseteq T^\ast Q$, we might consider a third version based on paths in position-velocity space $TQ$. But when our Lagrangian is regular we have a diffeomorphism $\lambda \colon TQ\stackrel{\sim}{\rightarrow}X$, so this third principle of least action is just a reformulation of principle~\ref{it:X-space-least-action}. However, the \emph{really} interesting principle of least action involves paths in the \define{extended phase space} where we have an additional coordinate for time: $X\times\mathbb{R}$.
Recall the action
\begin{align*}
S(x) &=\int(p_i\qdot^i - H)\,dt \\
&= \int p_i\frac{dq^i}{dt}dt - H\,dt \\
&= \int p_idq^i - H\,dt
\end{align*}
We can interpet the integrand as a 1-form
\[
\beta = p_idq^i - H\,dt
\]
on $X\times\mathbb{R}$, which has coordinates $\{p_i,q^i,t\}$. So any path
\[
x \colon [t_0,t_1]\longrightarrow X
\]
gives a path
\begin{align*}
\sigma \colon [t_0,t_1]&\longrightarrow X\times\mathbb{R} \\
t &\longmapsto (x(t),t)
\end{align*}
and the action becomes the integral of a 1-form over a curve:
\[
S(x) = \int p_i\,dq^i - H\,dt = \int_{\sigma} \beta
\]
%%
%%
%%
\section{Waves versus Particles---The Hamilton-Jacobi Equations}
\label{sec:Hamilton_Jacobi_Eqns}
%\vspace*{2ex}
%\noindent(Week 7, May 9, 11, 13.)\vspace{2ex}
In quantum mechanics we discover that every particle---electrons,
photons, neutrinos, etc.---is a wave, and vice versa. Interestingly
Newton already had a particle theory of light (his ``corpuscules'')
and various physicists argued against it by pointing out that
diffraction is best explained by a wave theory. We've talked about
geometrized optics, an approximation in which light consists of
particles moving along geodesics. Here we start with a Riemannian
manifold $(Q,g)$ as space, but we use the new metric
\[
h_{ij} = n^2g_{ij}
\]
where $n \colon Q\rightarrow(0,\infty)$ is the index of refraction throughout space (generally not a constant).
%%
%%
\subsection{Wave Equations}
Huygens considered this same setup (in simpler language) and considered the motion of a wavefront:
\[\xy
0;<1mm,0mm>:
(0,0),(0,10), { \ellipse(15)ur,^,ul{} },
(0,0),(0,10), { \ellipse(20)ur,^,ul{} },
(0,0),(0,10), { \ellipse(25)ur,^,ul{} },
(12.5,10) \ar @{->} (30,10)
\endxy
\]
and saw that the wavefront is the envelope of a bunch of little wavelets centered at points along the big wavefront:
\[ \xy
0;<1mm,0mm>:
(0,0),(0,0), { \ellipse(25)ur,^,ul{} },
(0,0),(0,0), { \ellipse(25.7)v<1mm,1.2mm>,^,v<-1mm,1.2mm>{}},
(23.4,5.4);(18.4,15.6); **\crv{~*=<8pt>{}~**\cir<4pt>{}(22.3,10.8) },
(23.7,-2.7);(23.7,2.7); **\crv{~*=<8pt>{}~**\cir<4pt>{}(24.3,0) },
(23.4,-5.4);(18.4,-15.6); **\crv{~*=<8pt>{}~**\cir<4pt>{}(22.3,-10.8) },
(23.4,5.4);(18.4,15.6); **\crv{~*=<11pt>{}~**\cir<1pt>{}(22.3,10.8) },
(23.7,-2.7);(23.7,2.7); **\crv{~*=<11pt>{}~**\cir<1pt>{}(24.3,0) },
(23.4,-5.4);(18.4,-15.6); **\crv{~*=<11pt>{}~**\cir<1pt>{}(22.3,-10.8) },
(52,15), *\txt{\small balls of radius $\epsilon$ \\ \small centered at points\\ \small of the old wavefront},
%(35,15),(35,15),\POS(38,18) \ar@{->}@(l,ur) (24,12)*{}
(38,18);(24,12), **\crv{(28,18)} ?>*\dir{>}
\endxy \]
In short, the wavefront moves at unit speed in the normal direction with respect to the ``optical metric'' $h$. We can think about the distance function
\[
d \colon Q\times Q\longrightarrow [0,\infty)
\]
on the Riemannian manifold $(Q,h)$, where
\[
d(q_0,q_1) = \inf_{\Upsilon}(\text{arclength})
\]
where $\Upsilon=\{$paths from $q_0$ to $q_1\}$. (Secretly this $d(q_0,q_1)$ \emph{is} the least action---the infimum of action over all paths from $q_0$ to $q_1$.) Using this we get the wavefronts centered at $q_0\in Q$ as the level sets
\[
\{ q \colon d(q_0,q)=c\}
\]
or at least for small $c>0$, as depicted in Fig.~\ref{fig:wave-levelsets}.
\begin{figure}[ht]
\centering
\psfrag{q0}[c]{$q_0$}
\includegraphics[width=.4\textwidth]{images/wave-levelsets}
\caption{}
\label{fig:wave-levelsets}
\end{figure}
For larger $c$ the level sets can cease to be smooth---we say a \define{catastrophe} occurs---and then the wavefronts are no longer the level sets. This sort of situation can happen for topological reasons (as when the waves smash into
each other in the back of Fig.~\ref{fig:wave-levelsets})
and it can also happen for geometrical reasons
(Fig.~\ref{fig:waves_geom_catas}).
\begin{figure}[ht]
\centering
\includegraphics[width=.4\textwidth]{images/waves_geom_catas}
\caption{}
\label{fig:waves_geom_catas}
\end{figure}
Assuming no such catastrophes occur, we can approximate the waves of light by a wavefunction:
\[
\psi(q)=A(q)e^{ik\,d(q,q_0)}
\]
where $k$ is the wavenumber of the light (i.e., its color) and $A \colon Q\rightarrow\mathbb{R}$ describes the amplitude of the wave, which drops off far from $q_0$. This becomes the \define{eikonal approximation} in optics\footnote{Eikonal comes form the Greek word for `image' or `likeness', in optics the eikonal approximation is the basis for ray tracing methods.} once we figure out what $A$ should be.
Hamilton and Jacobi focused on distance $d \colon Q\times Q\rightarrow [0,\infty)$ as a function of \emph{two} variables. This is now called \emph{Hamilton's principal function}, and we will denote it as $W$. They noticed that
\[
\frac{\pa}{\pa q_1^i} W(q_0,q_1) = (p_1)_i
\]
\[
\xy
%0;<1mm,0mm>:
(47,-3)*+{q_0},(50,0)*\dir{*};(100,10),
%**\crv{(60,-10)&(80,10)&(95,10)&(100,10)} ?>*\dir{*},
**\crv{(60,-10)&(80,10)&(95,10)&(100,10)},
(103,7)*+{p_1}, (95,10),(97,10) {\ar@{*\dir{*}->} (105,10)}
\POS(91.5,7) \ar @{-} (91.5,13)
\POS(93,7) \ar @{-} (93,13) \POS(94.5,7) \ar @{-} (94.5,13)
\POS(96,7) \ar @{-} (96,13) \POS(97.5,7),
(94.5,5)*+{q_1}
\endxy
\]
where $p_1$ is a cotangent vector \emph{pointing normal to the wavefronts}.
%%
%%
\subsection{The Hamilton-Jacobi Equations}
We've seen that in optics, particles of light move along geodesics, but wavefronts are level sets of the distance functions:
\[ \xy
(0,0);(0,0)*\dir{}="c",
%(0,0);(5,0)*\dir{}="p",
*\xycircle(5,5){\dir{-}},
*\xycircle(10,10){\dir{-}},
*\xycircle(15,15){\dir{-}},
"c"; (13,7.5) **@{-} ?/2pt/*\dir{>}
\endxy \]
at least while the level sets remain smooth. In the eikonal approximation, light is described by waves
\begin{align*}
\psi \colon Q &\longrightarrow \mathbb{C} \\
\psi(q_1) &=A(q_1)e^{ik\,W(q_0,q_1)}
\end{align*}
where $(Q,h)$ is a Riemannian manifold, $h$ is the optical metric, $q_0\in Q$ is the light source, $k$ is the frequency and
\[
W \colon Q\times Q\longrightarrow [0,\infty)
\]
is the distance function on $Q$, or \define{Hamilton's principal function}:
\[
W(q_0,q_1) = \inf_{q\in\Upsilon} S(q)
\]
where $\Upsilon$ is the space of paths from $q_0$ to $q$ and $S(q)$ is the action of the path $q$, i.e., its arclength. This is begging to be generalized to other Lagrangian systems! (At least it is retrospectively, with the advantage of our historical perspective.) We also saw that
\[
\frac{\pa}{\pa q_1^i}W(q_0,q_1) = (p_1)_i,
\]
\[
\xy
(48,-3)*+{q_0},(50,0)*\dir{*};(90,10),
**\crv{(60,0)&(70,10)&(85,10)&(90,10)},
(90,7)*+{p_1}, (85,10),(85,10) {\ar@{*\dir{*}->} (90,10)}
\POS(79.5,7) \ar @{-} (79.5,13)
\POS(81,7) \ar @{-} (81,13) \POS(82.5,7) \ar @{-} (82.5,13)
\POS(84,7) \ar @{-} (84,13) \POS(85.5,7),
(82.5,5)*+{q_1}
\endxy\]
``points normal to the wavefront''---really the tangent vector
\[
p_1^i = h^{ij}(p_1)_j
\]
points in this direction. In fact $kp_1{}_i$ is the \emph{momentum} of the light passing through $q_1$. This foreshadows quantum mechanics! After all, in
quantum mechanics, the momentum is an operator that acts to
differentiating the wavefunction.
Jacobi generalized this to the motion of point particles in a potential $V \colon Q\rightarrow\mathbb{R}$, using the fact that a particle of energy $E$ traces out geodesics in the metric
\[
h_{ij} = \frac{2(E-V)}{m}\, g_{ij}.
\]
We've seen this reduces point particle mechanics to optics---but only for particles of fixed energy $E$. Hamilton went further, and we now can go further still.
Suppose $Q$ is any manifold and $L \colon TQ\rightarrow\mathbb{R}$ is any function (Lagrangian). Define Hamilton's principal function
\[
W \colon Q\times\mathbb{R}\times Q\times\mathbb{R}\longrightarrow\mathbb{R}
\]
by
\[
W(q_0,t_0; q_1, t_1) = \inf_{q\in\Upsilon} S(q)
\]
where
\[
\Upsilon=\bigl\{ q \colon [t_0,t_1]\rightarrow Q,\, q(t_0)=q_0,\text{ \& }q(t_1)=q_1 \bigr\}
\]
and
\[
S(q) = \int_{t_0}^{t_1} L\Bigl(q(t),\qdot(t)\Bigr)\,dt
\]
Now $W$ is just the \emph{least action} for a path from $(q_0,t_0)$ to $(q_1,t_1)$; it'll be smooth if $(q_0,t_0)$ and $(q_1,t_1)$ are close enough---so let's assume that is true. In fact, we have
\[
\frac{\pa}{\pa q_1^i}W(q_0,q_1) = (p_1)_i,
\]
\[
\xy
(48,-3)*+{(q_0,t_0)},(50,0)*\dir{*};(90,10),
**\crv{(60,0)&(70,10)&(85,10)&(90,10)},
(90,7)*+{p_1}, (85,10),(85,10) {\ar@{*\dir{*}->} (90,10)}
\POS(79.5,7) \ar @{-} (79.5,13)
\POS(81,7) \ar @{-} (81,13) \POS(82.5,7) \ar @{-} (82.5,13)
\POS(84,7) \ar @{-} (84,13) \POS(85.5,7),
(82.5,5)*+{(q_1,t_1)}
\endxy\]
where $p_1$ is the momentum of the particle going from $q_0$ to $q_1$, at time $t_1$, and
\begin{align*}
\frac{\pa W}{\pa q_0^i}&= -(p_0)_i,\qquad\text{(-momentum at time $t_0$)}\\
\frac{\pa W}{\pa t_1}&= -H_1,\qquad\text{(-energy at time $t_1$)}\\
\frac{\pa W}{\pa t_0}&= H_0,\qquad\text{(+energy at time $t_0$)}
\end{align*}
($H_1=H_0$ as energy is conserved). These last four equations are the \define{Hamilton--Jacobi equations}. The mysterious minus sign in front of energy was seen before in the 1-form,
\[
\beta = p_idq^i - H\,dt
\]
on the extended phase space $X\times\mathbb{R}$. Maybe the best way to get the Hamilton--Jacobi equations is from this extended phase space formulation. But for now let's see how Hamilton's principal function $W$ and variational principles involving least action also yield the Hamilton--Jacobi equations.
Given $(q_0,t_0)$, $(q_1,t_1)$, let
\[
q \colon [t_0,t_1]\longrightarrow Q
\]
be the action-minimizing path from $q_0$ to $q_1$. Then
\[
W(q_0,t_0;\, q_1,t_1) = S(q)
\]
Now consider varying $q_0$ and $q_1$ a bit
\[\xy
(-10,-17)*+{t_0}, (10,-17)*+{t_1}, (-5,-4)*+{q},
(-10,-2);(10,-2), **\crv{(-5,2)&(5,-8)},
(-10,1);(10,2), **\crv{(-5,10)&(5,-5)},
\POS(-10,-15) \ar @{-} (-10,15)
\POS(10,-15) \ar @{-} (10,15)
\endxy\]
and thus vary the action-minimizing path, getting a variation $\delta q$ which does not vanish at $t_0$ and $t_1$. We get
\begin{align*}
\delta W &= \delta S \\
&= \delta\int_{t_0}^{t_1} L(q,\qdot)\,dt \\
&= \int_{t_0}^{t_1}\biggl( \frac{\pa L}{\pa q^i}\delta q^i +
\frac{\pa L}{\pa\qdot^i}\delta\qdot^i \biggr)\,dt \\
&= \int_{t_0}^{t_1}\biggl( \frac{\pa L}{\pa q^i}\delta q^i
- \pdot_i\delta q^i \biggr)\,dt + \biggl.p_i\delta q^i\biggr|_{t_0}^{t_1}\\
&=\int_{t_0}^{t_1}\biggl( \frac{\pa L}{\pa q^i}
- \pdot_i\biggr) \delta q^i \,dt
\end{align*}
the term in parentheses is zero because $q$ minimizes the action \emph{and} the Euler--Lagrange equations hold. So we $\delta q^i$ have
\[
\delta W = p_1{}_i\delta q_1^i - p_0{}_i\delta q_0^i
\]
and so
\[
\frac{\pa W}{\pa q_1^i} = p_{1i},\quad\text{and}\quad\frac{\pa W}{\pa q_0^i}=-p_0{}_i
\]
These are two of the four Hamilton--Jacobi equations! To get the other two, we need to vary $t_0$ and $t_1$:
\[
\hspace{10ex}
\xy
(-10,-17)*\txt{\ssz $t_0$}, (16,-20)*\txt{\ssz $t_1+\Delta t_1$},
(-14,-20)*\txt{\ssz $t_0+\Delta t_0$}, (10,-17)*\txt{\ssz $t_1$},
(-10,-2);(10,-2), **\crv{(-5,2)&(5,-6)} ?>*\dir{*} ?<*\dir{*},
(-14,0);(14,2), **\crv{(-5,10)&(5,-5)} ?>*\dir{*} ?<*\dir{*},
(30,5), *\txt{\small \makebox[10ex][l]{Now change in}\\ \small \makebox[10ex][l]{$W$ will involve $\Delta t_0$}\\ \small \makebox[10ex][l]{and $\Delta t_1$}},
\POS(-14,-18) \ar @{-} (-14,15)
\POS(-10,-15) \ar @{-} (-10,15)
\POS(10,-15) \ar @{-} (10,15)
\POS(14,-18) \ar @{-} (14,15)
\endxy\]
(you can imagine $\Delta t_0<0$ in this figure if you like).
We want to derive the Hamilton--Jacobi equations describing the derivatives of Hamilton's principal function
\[
W(q_0,t_0;\,q_1,t_1) = \inf_{q\in\Upsilon} S(q)
\]
where $\Upsilon$ is the space of paths $q \colon [t_0,t_1]\rightarrow Q$ with $q(t_0)=q$, $q(t_1)=q_1$ and
\[
S(q) = \int_{t_0}^{t_1}L(q,\qdot)\,dt
\]
where the Lagrangian $L \colon TQ\rightarrow\mathbb{R}$ will now be assumed \emph{regular}, so that
\begin{align*}
\lambda TQ \colon &\longrightarrow X\subseteq T^\ast Q \\
(q,\qdot) &\longmapsto (q,p)
\end{align*}
is a diffeomorphism. We need to ensure that $(q_0,t_0)$ is close enough to $(q_i,t_1)$ that there is a unique $q\in\Upsilon$ that minimizes the action $S$, and assume that this $q$ depends smoothly on $U=(q_0,t_0; q_1,t_1)\in(Q\times\mathbb{R})^2$. We'll think of $q$ as a function of $U$:
\[
\hspace{15ex}
\xy
(-19,-1)*\txt{\ssz\makebox[4ex][r]{$(t_0,q_0)$}},
(19,5)*\txt{\ssz \makebox[4ex][l]{$(t_1, q_1)$}}, (0,5)*{q},
(-15,0);(15,5), **\crv{(-10,5)&(-5,-7)&(7,10)} ?>*\dir{*} ?<*\dir{*},
(50,5), *\txt{\small \makebox[25ex][c]{$(Q\times\mathbb{R})^2\rightarrow\Upsilon$} \\
\small \makebox[25ex][c]{$u\mapsto q$} \\
\small \makebox[25ex][l]{defined only when $(q_0,t_0)$ and} \\
\small \makebox[25ex][l]{$(q_1,t_1)$ are sufficiently close.} \\ },
\POS(-15,-18) \ar @{-} (-15,18)
\POS(15,-18) \ar @{-} (15,18)
\endxy\]
Then Hamilton's principal function is
\begin{align*}
W(u) \defeq W(q_0,t_0;q_1,t_1) &= S(q) \\
&= \int_{t_0}^{t_1} L(q,\qdot)\,dt \\
&= \int_{t_0}^{t_1} \Bigl(p\qdot-H(q,p)\Bigr)\,dt \\
&= \int_{t_0}^{t_1} p\,dq-H\,dt \\
&= \int_{C} \beta
\end{align*}
where $\beta=pdq-H(q,p)dt$ is a 1-form on the extended phase space $X\times\mathbb{R}$, and $C$ is a curve in the extended phase space:
\[
C(t)=\Bigl(q(t),p(t),t\Bigr)\in X \times \mathbb{R}.
\]
Note that $C$ depends on the curve $q\in\Upsilon$, which in turn depends upon $u=(q_0,t_0;q_1,t_1)\in (Q\times\mathbb{R})^2$. We are after the derivatives of $W$ that appear in the Hamilton--Jacobi relations, so let's differentiate
\[
W(u)=\int_{C}\beta
\]
with respect to $u$ and get the Hamilton--Jacobi equations from $\beta$. Let $u_s$ be a 1-parameter family of points in $(Q\times\mathbb{R})^2$ and work out
\[
\frac{d}{ds}W(u_s) = \frac{d}{ds}\int_{C_s}\beta
\]
where $C_s$ depends on $u_s$ as above
\[
\xy
(-18,-10)*{}="a1",(-22,2)*{}="a2",
(26,-7)*{}="b1",(20,5)*{}="b2",
% Top path:
"a2";"b2"
**\crv{(-7,15)&(7,-5)} \POS?*^+!D{\txt{\small$C_s$}}
?*\dir{>} ?>*\dir{*} ?<*\dir{*},
% Bottom path:
"a1";"b1"
**\crv{(-7,-5)&(7,-20)} \POS?/2pt/*_+!U{\txt{\small$C_0$}}
?/2pt/*\dir{>} ?>*\dir{*} ?<*\dir{*},
(45,0) *\txt{\makebox[4ex][l]{$X\times\mathbb{R}$}},
\POS"a1" \ar @{-} @/_0.7ex/ ^{A_s} "a2" \POS?/4pt/*\dir{>}
\POS"b1" \ar @{-} @/^.5ex/ _{B_s} "b2" \POS?*\dir{>}
% Middle curves: upper, mid, lower
\POS(-20,0);(21,1),**\crv{~*=<4pt>{.}(-7,10)&(10,-8)}
\POS(-18,-3);(22,-2),**\crv{~*=<4pt>{.}(-6,4)&(10,-12)}
\POS(-17,-7);(23,-5),**\crv{~*=<4pt>{.}(-5,-1)&(8,-16)}
\POS(-30,-25) \ar @{-} (-30,20) \ar @{-} (35,-25)
\POS(35,20) \ar @{-} (-30,20) \ar @{-} (35,-25)
\endxy\]
Let's compare
\[
\int_{C_s}\beta \quad\text{and}\quad \int_{A_s+C_s+B_s}=\int_{A_s}\beta+\int_{C_s}\beta+\int_{B_s}\beta
\]
Since $C_0$ minimizes the action among paths with the given end-points, and the curve $A_s+C_s+B_s$ has the same end-points, we get
\[
\frac{d}{ds}\int_{A_s+C_s+B_s}\beta = 0
\]
(although $A_s+C_s+B_s$ is not smooth, we can approximate it by a path that is smooth). So
\[
\frac{d}{ds}\int_{C_s}\beta = \frac{d}{ds}\int_{B_s}\beta - \frac{d}{ds}\int_{A_s}\beta \quad\text{at $s=0$}.
\]
Note
\begin{align*}
\frac{d}{ds}\int_{A_s}\beta &= \frac{d}{ds}\int\beta(A'_r)\,dr \\
&=\beta(A'_0)
\end{align*}
where $A'_0=v$ is the tangent vector of $A_s$ at $s=0$. Similarly,
\[
\frac{d}{ds}\int_{B_s}\beta = \beta(w)
\]
where $w=B'_0$. So,
\[
\frac{d}{ds}W(u_s) = \beta(w)-\beta(v)
\]
where $w$ keeps track of the change of $(q_1,p_1,t_1)$ as we move $C_s$ and $v$ keeps track of $(q_0,p_0,t_0)$. Now since $\beta=p^idq_i-Hdt$, we get
\begin{align*}
\frac{\pa W}{\pa q_1^i} &= p_1^i \\
\frac{\pa W}{\pa t_1} &= -H
\end{align*}
and similarly
\begin{align*}
\frac{\pa W}{\pa q_0^i} &= -p_0^i \\
\frac{\pa W}{\pa t_0} &= H
\end{align*}
So, if we define a wavefunction:
\[
\psi(q_0,t_0;q_1,t_1)=e^{iW(q_0,t_0;q_1,t_1)/\hbar}
\]
then we get
\begin{align*}
\frac{\pa \psi}{\pa t_1} &= -\frac{i}{\hbar}H_1\psi \\
\frac{\pa \psi}{\pa q_1^i} &= \frac{i}{\hbar}p_1\psi
\end{align*}
At the time of Hamilton and Jacobi's research this would have been new... but nowadays it is thoroughly familiar from \emph{quantum mechanics}!
\bibliographystyle{alpha}
\bibliography{class-mech}
%\input{app1}
\end{document}
**