library.bib


@BOOK{Hyndman2018-ok,
  title     = "Forecasting: principles and practice",
  author    = "Hyndman, Rob J and Athanasopoulos, George",
  abstract  = "Forecasting is required in many situations. Stocking an
               inventory may require forecasts of demand months in advance.
               Telecommunication routing requires traffic forecasts a few
               minutes ahead. Whatever the circumstances or time horizons
               involved, forecasting is an important aid in effective and
               efficient planning.This textbook provides a comprehensive
               introduction to forecasting methods and presents enough
               information about each method for readers to use them sensibly.",
  publisher = "OTexts",
  month     =  may,
  year      =  2018,
  url       = "https://OTexts.com/fpp2",
}

@ARTICLE{Hong2019-ie,
  title     = "Global energy forecasting competition 2017: Hierarchical
               probabilistic load forecasting",
  author    = "Hong, Tao and Xie, Jingrui and Black, Jonathan",
  abstract  = "The Global Energy Forecasting Competition 2017 (GEFCom2017)
               attracted more than 300 students and professionals from over 30
               countries for solving hierarchical probabilistic load
               forecasting problems. Of the series of global energy forecasting
               competitions that have been held, GEFCom2017 is the most
               challenging one to date: the first one to have a qualifying
               match, the first one to use hierarchical data with more than two
               levels, the first one to allow the usage of external data
               sources, the first one to ask for real-time ex-ante forecasts,
               and the longest one. This paper introduces the qualifying and
               final matches of GEFCom2017, summarizes the top-ranked methods,
               publishes the data used in the competition, and presents several
               reflections on the competition series and a vision for future
               energy forecasting competitions.",
  journal   = "International Journal of Forecasting",
  publisher = "Elsevier",
  volume    =  35,
  number    =  4,
  pages     = "1389-1399",
  month     =  oct,
  year      =  2019,
  url       = "http://www.sciencedirect.com/science/article/pii/S016920701930024X",
  keywords  = "Load forecasting; Hierarchical forecasting; Forecasting
               competition; Energy forecasting; Probabilistic forecasting",
  issn      = "0169-2070",
  doi       = "10.1016/j.ijforecast.2019.02.006"
}

@ARTICLE{Diebold2002-ru,
  title     = "Comparing Predictive Accuracy",
  author    = "Diebold, Francis X and Mariano, Robert S",
  abstract  = "We propose and evaluate explicit tests of the null hypothesis of
               no difference in the accuracy of two competing forecasts. In
               contrast to previously developed tests, a wide variety of
               accuracy measures can be used (in particular, the loss of
               function need not be quadratic and need not even be symmetric),
               and forecast errors can be non-Gaussian, nonzero mean, serially
               correlated, and contemporaneously correlated. Asymptotic and
               exact finite-sample tests are proposed, evaluated, and
               illustrated.",
  journal   = "Journal of Business \& Economic Statistics",
  publisher = "Taylor \& Francis",
  volume    =  20,
  number    =  1,
  pages     = "134-144",
  month     =  jan,
  year      =  2002,
  url       = "https://doi.org/10.1198/073500102753410444"
}

@ARTICLE{Roach2020-ch,
  title    = "Estimating electricity impact profiles for building
              characteristics using smart meter data and mixed models",
  author   = "Roach, Cameron",
  abstract = "Understanding the impact of building characteristics on
              electricity demand is important for policy and management
              decision making. Certain building characteristics and equipment
              may increase or decrease electricity consumption. Due to
              different operating practices, these impacts on electricity
              consumption may vary both across the day and across seasons.
              Quantifying the magnitude and statistical significance of these
              impacts will help managers and policy makers make better informed
              decisions. Here we present a mixed effects model to assess the
              importance of several variables on building electricity
              consumption. We use smart meter and building attribute data for
              129 commercial office buildings. Our building attribute data
              includes information on installed equipment and meter
              characteristics of each building. To account for uncertainty in
              both variable significance and model selection we follow a
              multimodel inference approach. Demand impact profiles that show
              the expected change in electricity demand when a characteristic
              is absent or present are produced for each season. A discussion
              of the commercial office building characteristics we use and
              their impact on the daily profile of electricity demand is
              presented. Our approach has the advantage of only requiring
              building level demand and characteristic data. No equipment level
              sub-metering is required. Furthermore, our approach can also be
              used to quantify changes in electricity consumption caused by
              other factors that do not directly draw electricity from the
              grid, such as management decisions or occupant behaviour. We
              conclude with a discussion of applications for our methodology
              and future research directions.",
  journal  = "Energy and Buildings",
  volume   =  211,
  pages    = "109686",
  month    =  mar,
  year     =  2020,
  url      = "http://www.sciencedirect.com/science/article/pii/S0378778818335795",
  keywords = "Smart meters; Energy consumption; Mixed effects models;
              Multimodel inference; Office spaces",
  issn     = "0378-7788",
  doi      = "10.1016/j.enbuild.2019.109686"
}

@ARTICLE{Ugarte2009-hx,
  title    = "Spline smoothing in small area trend estimation and forecasting",
  author   = "{Ugarte} and Goicoa, T and Militino, A F and Durbán, M",
  abstract = "Semiparametric models combining both non-parametric trends and
              small area random effects are now currently being investigated in
              small area estimation (SAE). These models can prevent bias when
              the functional form of the relationship between the response and
              the covariates is unknown. Furthermore, penalized spline
              regression can be a good tool to incorporate non-parametric
              regression models into the SAE techniques, as it can be
              represented as a mixed effects model. A penalized spline model is
              considered to analyze trends in small areas and to forecast
              future values of the response. The prediction mean squared error
              (MSE) for the fitted and the predicted values, together with
              estimators for those quantities, are derived. The procedure is
              illustrated with real data consisting of average prices per
              squared meter of used dwellings in nine neighborhoods of the city
              of Vitoria, Spain, during the period 1993–2007. Dwelling prices
              for the next five years are also forecast. A simulation study is
              conducted to assess the performance of both the small area trend
              estimator and the prediction MSE estimators. The results confirm
              a good behavior of the proposed estimators in terms of bias and
              variability.",
  journal  = "Computational Statistics \& Data Analysis",
  volume   =  53,
  number   =  10,
  pages    = "3616-3629",
  month    =  aug,
  year     =  2009,
  url      = "http://www.sciencedirect.com/science/article/pii/S0167947309000747"
}

@ARTICLE{Soyer2008-in,
  title     = "Modeling and Analysis of Call Center Arrival Data: A Bayesian
               Approach",
  author    = "Soyer, Refik and Tarimcilar, M Murat",
  abstract  = "In this paper, we present a modulated Poisson process model to
               describe and analyze arrival data to a call center. The
               attractive feature of this model is that it takes into account
               both covariate and time effects on the call volume intensity,
               and in so doing, enables us to assess the effectiveness of
               different advertising strategies along with predicting the
               arrival patterns. A Bayesian analysis of the model is developed
               and an extension of the model is presented to describe potential
               heterogeneity in arrival patterns. The proposed model and the
               methodology are implemented using real call center arrival data.",
  journal   = "Management Science",
  publisher = "INFORMS",
  volume    =  54,
  number    =  2,
  pages     = "266-278",
  month     =  feb,
  year      =  2008,
  url       = "https://doi.org/10.1287/mnsc.1070.0776"
}

@ARTICLE{Aldor-Noiman2009-ji,
  title     = "Workload forecasting for a call center: Methodology and a case
               study",
  author    = "Aldor-Noiman, Sivan and Feigin, Paul D and Mandelbaum, Avishai",
  abstract  = "Today's call center managers face multiple operational
               decision-making tasks. One of the most common is determining the
               weekly staffing levels to ensure customer satisfaction and
               meeting their needs while minimizing service costs. An initial
               step for producing the weekly schedule is forecasting the future
               system loads which involves predicting both arrival counts and
               average service times. We introduce an arrival count model which
               is based on a mixed Poisson process approach. The model is
               applied to data from an Israeli Telecom company call center. In
               our model, we also consider the effect of events such as billing
               on the arrival process and we demonstrate how to incorporate
               them as exogenous variables in the model. After obtaining the
               forecasted system load, in large call centers, a manager can
               choose to apply the QED (Quality-Efficiency Driven) regime's
               ``squareroot staffing'' rule in order to balance the
               offered-load per server with the quality of service.
               Implementing this staffing rule requires that the forecasted
               values of the arrival counts and average service times maintain
               certain levels of precision. We develop different goodness of
               fit criteria that help determine our model's practical
               performance under the QED regime. These show that during most
               hours of the day the model can reach desired precision levels.",
  journal   = "Annals of Applied Statistics",
  publisher = "Institute of Mathematical Statistics",
  volume    =  3,
  number    =  4,
  pages     = "1403-1447",
  month     =  dec,
  year      =  2009,
  url       = "https://projecteuclid.org/euclid.aoas/1267453946",
  keywords  = "Call centers; QED regime; square-root staffing; forecasting
               arrival count; exogenous variables",
}

@ARTICLE{Ibrahim2016-ch,
  title     = "Modeling and forecasting call center arrivals: A literature
               survey and a case study",
  author    = "Ibrahim, Rouba and Ye, Han and L'Ecuyer, Pierre and Shen,
               Haipeng",
  abstract  = "The effective management of call centers is a challenging task,
               mainly because managers consistently face considerable
               uncertainty. One important source of this uncertainty is the
               call arrival rate, which is typically time-varying, stochastic,
               dependent across time periods and call types, and often affected
               by external events. The accurate modeling and forecasting of
               future call arrival volumes is a complicated issue which is
               critical for making important operational decisions, such as
               staffing and scheduling, in the call center. In this paper, we
               review the existing literature on modeling and forecasting call
               arrivals. We also discuss the key issues for the building of
               good statistical arrival models. In addition, we evaluate the
               forecasting accuracy of selected models in an empirical study
               with real-life call center data. We conclude with a summary of
               possible future research directions in this important field.",
  journal   = "International Journal of Forecasting",
  publisher = "Elsevier",
  volume    =  32,
  number    =  3,
  pages     = "865-874",
  month     =  jul,
  year      =  2016,
  url       = "http://www.sciencedirect.com/science/article/pii/S016920701500151X",
  keywords  = "Call center arrivals; Forecasting; Time series; Doubly
               stochastic Poisson; Fixed-effects; Mixed-effects; ARIMA;
               Exponential smoothing; Bayesian; Dimension reduction;
               Dependence; Seasonality; Marketing events"
}

@ARTICLE{Frees2004-sx,
  title     = "Sales forecasting using longitudinal data models",
  author    = "Frees, Edward W and Miller, Thomas W",
  abstract  = "This paper shows how to forecast using a class of linear mixed
               longitudinal, or panel, data models. Forecasts are derived as
               special cases of best linear unbiased predictors, also known as
               BLUPs, and hence are optimal predictors of future realizations
               of the response. We show that the BLUP forecast arises from
               three components: (1) a predictor based on the conditional mean
               of the response, (2) a component due to time-varying
               coefficients, and (3) a serial correlation correction term. The
               forecasting techniques are applicable in a wide variety of
               settings. This article discusses forecasting in the context of
               marketing and sales. In particular, we consider a data set of
               the Wisconsin State Lottery, in which 40 weeks of sales are
               available for each of 50 postal codes. Using sales data as well
               as economic and demographic characteristics of each postal code,
               we forecast sales for each postal code.",
  journal   = "International Journal of Forecasting",
  publisher = "Elsevier",
  volume    =  20,
  number    =  1,
  pages     = "99-114",
  month     =  jan,
  year      =  2004,
  url       = "http://www.sciencedirect.com/science/article/pii/S0169207003000050",
  keywords  = "Panel data models; Unobserved effects; Random coefficients;
               Heterogeneity"
}

@ARTICLE{Ibrahim2013-oc,
  title     = "Forecasting Call Center Arrivals: {Fixed-Effects},
               {Mixed-Effects}, and Bivariate Models",
  author    = "Ibrahim, Rouba and L'Ecuyer, Pierre",
  abstract  = "We consider different statistical models for the call arrival
               process in telephone call centers. We evaluate the forecasting
               accuracy of those models by describing results from an empirical
               study analyzing real-life call center data. We test forecasting
               accuracy using different lead times, ranging from weeks to hours
               in advance, to mimic real-life challenges faced by call center
               managers. The models considered are (i) a benchmark
               fixed-effects model that does not exploit any dependence
               structures in the data; (ii) a mixed-effects model that takes
               into account both interday (day-to-day) and intraday
               (within-day) correlations; and (iii) two new bivariate
               mixed-effects models, for the joint distribution of the arrival
               counts to two separate queues, that exploit correlations between
               different call types. Our study shows the importance of
               accounting for different correlation structures in the data.",
  journal   = "Manufacturing \& Service Operations Management",
  publisher = "INFORMS",
  volume    =  15,
  number    =  1,
  pages     = "72-85",
  month     =  feb,
  year      =  2013,
  url       = "https://doi.org/10.1287/msom.1120.0405"
}

@ARTICLE{Roach2019-pf,
  title    = "Reconciled boosted models for {GEFCom2017} hierarchical
              probabilistic load forecasting",
  author   = "Roach, Cameron",
  abstract = "When forecasting time series in a hierarchical configuration, it
              is necessary to ensure that the forecasts reconcile at all
              levels. The 2017 Global Energy Forecasting Competition
              (GEFCom2017) focused on addressing this topic. Quantile forecasts
              for eight zones and two aggregated zones in New England were
              required for every hour of a future month. This paper presents a
              new methodology for forecasting quantiles in a hierarchy which
              outperforms a commonly-used benchmark model. A simulation-based
              approach was used to generate demand forecasts. Adjustments were
              made to each of the demand simulations to ensure that all zonal
              forecasts reconciled appropriately, and a weighted reconciliation
              approach was implemented to ensure that the bottom-level zonal
              forecasts summed correctly to the aggregated zonal forecasts. We
              show that reconciling in this manner improves the forecast
              accuracy. A discussion of the results and modelling performances
              is presented, and brief reviews of hierarchical time series
              forecasting and gradient boosting are also included.",
  journal  = "International Journal of Forecasting",
  volume   =  35,
  number   =  4,
  pages    = "1439-1450",
  month    =  oct,
  year     =  2019,
  url      = "http://www.sciencedirect.com/science/article/pii/S0169207018301791",
  issn     = "0169-2070",
  doi      = "10.1016/j.ijforecast.2018.09.009"
}

@ARTICLE{Bell2015-zn,
  title     = "Explaining Fixed Effects: Random Effects Modeling of
               {Time-Series} {Cross-Sectional} and Panel Data*",
  author    = "Bell, Andrew and Jones, Kelvyn",
  abstract  = "This article challenges Fixed Effects (FE) modeling as the
               `default' for time-series-cross-sectional and panel data.
               Understanding different within and between effects is crucial
               when choosing modeling strategies. The downside of Random
               Effects (RE) modeling—correlated lower-level covariates and
               higher-level residuals—is omitted-variable bias, solvable with
               Mundlak's (1978a) formulation. Consequently, RE can provide
               everything that FE promises and more, as confirmed by
               Monte-Carlo simulations, which additionally show problems with
               Plümper and Troeger's FE Vector Decomposition method when data
               are unbalanced. As well as incorporating time-invariant
               variables, RE models are readily extendable, with random
               coefficients, cross-level interactions and complex variance
               functions. We argue not simply for technical solutions to
               endogeneity, but for the substantive importance of
               context/heterogeneity, modeled using RE. The implications extend
               beyond political science to all multilevel datasets. However,
               omitted variables could still bias estimated higher-level
               variable effects; as with any model, care is required in
               interpretation.",
  journal   = "Political Science Research and Methods",
  publisher = "Cambridge University Press",
  volume    =  3,
  number    =  1,
  pages     = "133-153",
  month     =  jan,
  year      =  2015,
  url       = "https://www.cambridge.org/core/services/aop-cambridge-core/content/view/0334A27557D15848549120FE8ECD8D63/S2049847014000077a.pdf/div-class-title-explaining-fixed-effects-random-effects-modeling-of-time-series-cross-sectional-and-panel-data-a-href-fn2606-ref-type-fn-a-div.pdf"
}

@ARTICLE{Grajeda2016-vr,
  title    = "Modelling subject-specific childhood growth using linear
              mixed-effect models with cubic regression splines",
  author   = "Grajeda, Laura M and Ivanescu, Andrada and Saito, Mayuko and
              Crainiceanu, Ciprian and Jaganath, Devan and Gilman, Robert H and
              Crabtree, Jean E and Kelleher, Dermott and Cabrera, Lilia and
              Cama, Vitaliano and Checkley, William",
  abstract = "BACKGROUND: Childhood growth is a cornerstone of pediatric
              research. Statistical models need to consider individual
              trajectories to adequately describe growth outcomes.
              Specifically, well-defined longitudinal models are essential to
              characterize both population and subject-specific growth. Linear
              mixed-effect models with cubic regression splines can account for
              the nonlinearity of growth curves and provide reasonable
              estimators of population and subject-specific growth, velocity
              and acceleration. METHODS: We provide a stepwise approach that
              builds from simple to complex models, and account for the
              intrinsic complexity of the data. We start with standard cubic
              splines regression models and build up to a model that includes
              subject-specific random intercepts and slopes and residual
              autocorrelation. We then compared cubic regression splines
              vis-à-vis linear piecewise splines, and with varying number of
              knots and positions. Statistical code is provided to ensure
              reproducibility and improve dissemination of methods. Models are
              applied to longitudinal height measurements in a cohort of 215
              Peruvian children followed from birth until their fourth year of
              life. RESULTS: Unexplained variability, as measured by the
              variance of the regression model, was reduced from 7.34 when
              using ordinary least squares to 0.81 (p < 0.001) when using a
              linear mixed-effect models with random slopes and a first order
              continuous autoregressive error term. There was substantial
              heterogeneity in both the intercept (p < 0.001) and slopes (p <
              0.001) of the individual growth trajectories. We also identified
              important serial correlation within the structure of the data (ρ
              = 0.66; 95 \% CI 0.64 to 0.68; p < 0.001), which we modeled with
              a first order continuous autoregressive error term as evidenced
              by the variogram of the residuals and by a lack of association
              among residuals. The final model provides a parametric linear
              regression equation for both estimation and prediction of
              population- and individual-level growth in height. We show that
              cubic regression splines are superior to linear regression
              splines for the case of a small number of knots in both
              estimation and prediction with the full linear mixed effect model
              (AIC 19,352 vs. 19,598, respectively). While the regression
              parameters are more complex to interpret in the former, we argue
              that inference for any problem depends more on the estimated
              curve or differences in curves rather than the coefficients.
              Moreover, use of cubic regression splines provides biological
              meaningful growth velocity and acceleration curves despite
              increased complexity in coefficient interpretation. CONCLUSIONS:
              Through this stepwise approach, we provide a set of tools to
              model longitudinal childhood data for non-statisticians using
              linear mixed-effect models.",
  journal  = "Emerging Themes in Epidemiology",
  volume   =  13,
  pages    = "1",
  month    =  jan,
  year     =  2016,
  url      = "http://dx.doi.org/10.1186/s12982-015-0038-3",
  keywords = "Body Height; Child development; Growth; Linear Models;
              Longitudinal studies",
}

@ARTICLE{Govindarajulu2009-ld,
  title    = "The comparison of alternative smoothing methods for fitting
              non-linear exposure-response relationships with Cox models in a
              simulation study",
  author   = "Govindarajulu, Usha S and Malloy, Elizabeth J and Ganguli,
              Bhaswati and Spiegelman, Donna and Eisen, Ellen A",
  abstract = "We examined the behavior of alternative smoothing methods for
              modeling environmental epidemiology data. Model fit can only be
              examined when the true exposure-response curve is known and so we
              used simulation studies to examine the performance of penalized
              splines (P-splines), restricted cubic splines (RCS), natural
              splines (NS), and fractional polynomials (FP). Survival data were
              generated under six plausible exposure-response scenarios with a
              right skewed exposure distribution, typical of environmental
              exposures. Cox models with each spline or FP were fit to
              simulated datasets. The best models, e.g. degrees of freedom,
              were selected using default criteria for each method. The root
              mean-square error (rMSE) and area difference were computed to
              assess model fit and bias (difference between the observed and
              true curves). The test for linearity was a measure of sensitivity
              and the test of the null was an assessment of statistical power.
              No one method performed best according to all four measures of
              performance, however, all methods performed reasonably well. The
              model fit was best for P-splines for almost all true positive
              scenarios, although fractional polynomials and RCS were least
              biased, on average.",
  journal  = "International Journal of Biostatistics",
  volume   =  5,
  number   =  1,
  pages    = "Article 2",
  month    =  jan,
  year     =  2009,
  url      = "http://dx.doi.org/10.2202/1557-4679.1104",
}

@ARTICLE{Brabec2008-jf,
  title     = "A nonlinear mixed effects model for the prediction of natural
               gas consumption by individual customers",
  author    = "Brabec, Marek and Konár, Ondřej and Pelikán, Emil and Malý,
               Marek",
  abstract  = "Abstract This study deals with the description and prediction of
               the daily consumption of natural gas at the level of individual
               customers. Unlike traditional group averaging approaches, we are
               faced with the irregularities of individual consumption series
               posed by inter-individual heterogeneity, including zeros,
               missing data, and abrupt consumption pattern changes. Our model
               is of the nonlinear regression type, with individual
               customer-specific parameters that, nevertheless, have a common
               distribution corresponding to the nonlinear mixed effects model
               framework. It is advantageous to build the model conditionally.
               The first condition, whether a particular customer has consumed
               or not, is modeled as a consumption status in an individual
               fashion. The prediction performance of the proposed model is
               demonstrated using a real dataset of 62 individual customers,
               and compared with two more traditional approaches: ARIMAX and
               ARX.",
  journal   = "International Journal of Forecasting",
  publisher = "Elsevier",
  volume    =  24,
  number    =  4,
  pages     = "659-678",
  month     =  oct,
  year      =  2008,
  url       = "http://www.sciencedirect.com/science/article/pii/S0169207008000976",
  keywords  = "Individual gas consumption; Nonlinear mixed effects model;
               ARIMAX; ARX; Generalized linear mixed model; Conditional
               modeling"
}

@ARTICLE{Durban2005-lk,
  title    = "Simple fitting of subject-specific curves for longitudinal data",
  author   = "Durbán, M and Harezlak, J and Wand, M P and Carroll, R J",
  abstract = "We present a simple semiparametric model for fitting
              subject-specific curves for longitudinal data. Individual curves
              are modelled as penalized splines with random coefficients. This
              model has a mixed model representation, and it is easily
              implemented in standard statistical software. We conduct an
              analysis of the long-term effect of radiation therapy on the
              height of children suffering from acute lymphoblastic leukaemia
              using penalized splines in the framework of semiparametric mixed
              effects models. The analysis revealed significant differences
              between therapies and showed that the growth rate of girls in the
              study cannot be fully explained by the group-average curve and
              that individual curves are necessary to reflect the individual
              response to treatment. We also show how to implement these models
              in S-PLUS and R in the appendix.",
  journal  = "Statistics in Medicine",
  volume   =  24,
  number   =  8,
  pages    = "1153-1167",
  month    =  apr,
  year     =  2005,
  url      = "http://dx.doi.org/10.1002/sim.1991",
  issn     = "0277-6715",
  pmid     = "15568201",
  doi      = "10.1002/sim.1991"
}


@ARTICLE{Ben_Taieb2020-it,
  title     = "Hierarchical Probabilistic Forecasting of Electricity Demand
               with Smart Meter Data",
  author    = "Ben Taieb, Souhaib and Taylor, James W and Hyndman, Rob J",
  abstract  = "Abstract Electricity smart meters record consumption, on a near
               real-time basis, at the level of individual commercial and
               residential properties. From this, a hierarchy can be
               constructed consisting of time series of demand at the smart
               meter level, and at various",
  journal   = "Journal of the American Statistical Association",
  publisher = "Taylor \& Francis",
  pages     = "1-36",
  year      =  2020,
  note = {to appear},
  url       = "https://robjhyndman.com/papers/HPFelectricity.pdf"
}

@BOOK{Seber2012-gu,
  title     = "Linear Regression Analysis",
  author    = "Seber, George A F and Lee, Alan J",
  abstract  = "Concise, mathematically clear, and comprehensive treatment of
               the subject. * Expanded coverage of diagnostics and methods of
               model fitting. * Requires no specialized knowledge beyond a good
               grasp of matrix algebra and some acquaintance with straight-line
               regression and simple analysis of variance models. * More than
               200 problems throughout the book plus outline solutions for the
               exercises. * This revision has been extensively class-tested.",
  publisher = "John Wiley \& Sons",
  month     =  jan,
  year      =  2012,
}

@ARTICLE{Ben_Taieb2016-wl,
  title    = "Forecasting Uncertainty in Electricity Smart Meter Data by
              Boosting Additive Quantile Regression",
  author   = "Ben Taieb, Souhaib and Huser, Raphael and Hyndman, Rob J and
              Genton, Marc G",
  abstract = "A large body of the forecasting literature so far has been
              focused on forecasting the conditional mean of future obser-
              vations. However, there is an increasing need for generating the
              entire conditional distribution of future observations in order
              to effectively quantify the uncertainty in time series data. We
              present two different methods for probabilistic time series
              forecasting that allow the inclusion of a possibly large set of
              exogenous variables. One method is based on forecasting both the
              conditional mean and variance of the future distribution using a
              traditional regression approach. The other directly computes
              multiple quantiles of the fu- ture distribution using quantile
              regression. We propose an implementation for the two methods
              based on boosted ad- ditive models, which enjoy many useful
              properties including accuracy, flexibility, interpretability and
              automatic variable selection. We conduct extensive experiments
              using electric- ity smart meter data, on both aggregated and
              disaggregated scales, to compare the two forecasting methods for
              the chal- lenging problem of forecasting the distribution of
              future elec- tricity consumption. The empirical results
              demonstrate that the mean and variance forecasting provides
              better forecasts for aggregated demand, while the flexibility of
              the quan- tile regression approach is more suitable for
              disaggregated demand. These results are particularly useful since
              more energy data will become available at the disaggregated level
              in the future.",
  journal  = "IEEE Transactions on Smart Grid",
  volume   =  7,
  number   =  5,
  pages    = "2448-2455",
  year     =  2016
}

@ARTICLE{Arora2016-zh,
  title    = "Forecasting electricity smart meter data using conditional kernel
              density estimation",
  author   = "Arora, Siddharth and Taylor, James W",
  abstract = "The recent advent of smart meters has led to large micro-level
              datasets. For the first time, the electricity consumption at
              individual sites is available on a near real-time basis.
              Efficient management of energy resources, electric utilities, and
              transmission grids, can be greatly facilitated by harnessing the
              potential of this data. The aim of this study is to generate
              probability density estimates for consumption recorded by
              individual smart meters. Such estimates can assist decision
              making by helping consumers identify and minimize their excess
              electricity usage, especially during peak times. For suppliers,
              these estimates can be used to devise innovative time-of-use
              pricing strategies aimed at their target consumers. We consider
              methods based on conditional kernel density (CKD) estimation with
              the incorporation of a decay parameter. The methods capture the
              seasonality in consumption, and enable a nonparametric estimation
              of its conditional density. Using 8 months of half-hourly data
              for 1000 meterswe evaluate point and density forecasts, for lead
              times ranging from one half-hour up to a week ahead. We find that
              the kernel-based methods outperform a simple benchmark method
              that does not account for seasonality, and compare well with an
              exponential smoothing method that we use as a sophisticated
              benchmark. To gauge the financial impact, we use density
              estimates of consumption to derive prediction intervals of
              electricity cost for different time-of-use tariffs. We show that
              a simple strategy of switching between different tariffs, based
              on a comparison of cost densities, delivers significant cost
              savings for the great majority of consumers.",
  journal  = "Omega",
  volume   =  59,
  pages    = "47-59",
  month    =  mar,
  year     =  2016,
  url      = "http://www.sciencedirect.com/science/article/pii/S0305048314001546",
  keywords = "Electricity demand; Forecasting; Nonparametric density
              estimation; Smart meter"
}

@ARTICLE{Hyndman2006-bp,
  title    = "Another look at measures of forecast accuracy",
  author   = "Hyndman, Rob J and Koehler, Anne B",
  abstract = "We discuss and compare measures of accuracy of univariate time
              series forecasts. The methods used in the M-competition as well
              as the M3-competition, and many of the measures recommended by
              previous authors on this topic, are found to be degenerate in
              commonly occurring situations. Instead, we propose that the mean
              absolute scaled error become the standard measure for comparing
              forecast accuracy across multiple time series.",
  journal  = "International Journal of Forecasting",
  volume   =  22,
  number   =  4,
  pages    = "679-688",
  month    =  oct,
  year     =  2006,
  url      = "http://www.sciencedirect.com/science/article/pii/S0169207006000239",
  keywords = "Forecast accuracy; Forecast error measures; Forecast evaluation;
              M-competition; Mean absolute scaled error"
}

@ARTICLE{Hong2016-lo,
  title    = "Probabilistic energy forecasting: Global Energy Forecasting
              Competition 2014 and beyond",
  author   = "Hong, Tao and Pinson, Pierre and Fan, Shu and Zareipour,
              Hamidreza and Troccoli, Alberto and Hyndman, Rob J",
  abstract = "The energy industry has been going through a significant
              modernization process over the last decade. Its infrastructure is
              being upgraded rapidly. The supply, demand and prices are
              becoming more volatile and less predictable than ever before.
              Even its business model is being challenged fundamentally. In
              this competitive and dynamic environment, many decision-making
              processes rely on probabilistic forecasts to quantify the
              uncertain future. Although most of the papers in the energy
              forecasting literature focus on point or single-valued forecasts,
              the research interest in probabilistic energy forecasting
              research has taken off rapidly in recent years. In this paper, we
              summarize the recent research progress on probabilistic energy
              forecasting. A major portion of the paper is devoted to
              introducing the Global Energy Forecasting Competition 2014
              (GEFCom2014), a probabilistic energy forecasting competition with
              four tracks on load, price, wind and solar forecasting, which
              attracted 581 participants from 61 countries. We conclude the
              paper with 12 predictions for the next decade of energy
              forecasting.",
  journal  = "International Journal of Forecasting",
  volume   =  32,
  number   =  3,
  pages    = "896-913",
  year     =  2016,
  url      = "http://dx.doi.org/10.1016/j.ijforecast.2016.02.001",
  keywords = "Electric load forecasting; Electricity price forecasting;
              Forecasting competition; Probabilistic forecasting; Solar power
              forecasting; Wind power forecasting"
}

@ARTICLE{Gajowniczek2014-ek,
  title     = "Short Term Electricity Forecasting Using Individual Smart Meter
               Data",
  author    = "Gajowniczek, Krzysztof and Ząbkowski, Tomasz",
  journal   = "Procedia Computer Science",
  publisher = "Elsevier",
  volume    =  35,
  pages     = "589-597",
  year      =  2014,
  url       = "http://linkinghub.elsevier.com/retrieve/pii/S1877050914011053"
}

@INPROCEEDINGS{Ghofrani2011-tb,
  title     = "Smart meter based short-term load forecasting for residential
               customers",
  booktitle = "{NAPS} 2011 - 43rd North American Power Symposium",
  author    = "Ghofrani, M and Hassanzadeh, M and Etezadi-Amoli, M and Fadali,
               M S",
  abstract  = "This paper examines the potential impact of automatic meter
               reading (AMR) on short-term load forecasting for a residential
               customer. Real-time measurement data from customers' smart
               meters provided by a utility company is modeled as the sum of a
               deterministic component and a Gaussian noise signal. The shaping
               filter for the Gaussian noise is calculated using spectral
               analysis. Kalman filtering is then used for load prediction. The
               accuracy of the proposed method is evaluated for different
               sampling periods and planning horizons. The results show that
               the availability of more real-time measurement data improves the
               accuracy of the load forecast significantly. However, the
               improved prediction accuracy can come at a high computational
               cost. Our results qualitatively demonstrate that achieving the
               desired prediction accuracy while avoiding a high computational
               load requires limiting the volume of data used for prediction.
               Consequently, the measurement sampling rate must be carefully
               selected as a compromise between these two conflicting
               requirements.",
  publisher = "IEEE",
  pages     = "1-5",
  month     =  aug,
  year      =  2011,
  url       = "http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6025124",
  keywords  = "Kalman filtering; residential load; shaping filter; smart meter;
               spectral analysis"
}

@ARTICLE{Hyndman2010-ui,
  title    = "Density forecasting for long-term peak electricity demand",
  author   = "Hyndman, Rob J and Fan, Shu",
  abstract = "Long-term electricity demand forecasting plays an important role
              in planning for future generation facilities and transmission
              augmentation. In a long-term context, planners must adopt a
              probabilistic view of potential peak demand levels. Therefore
              density forecasts (providing estimates of the full probability
              distributions of the possible future values of the demand) are
              more helpful than point forecasts, and are necessary for
              utilities to evaluate and hedge the financial risk accrued by
              demand variability and forecasting uncertainty. This paper
              proposes a new methodology to forecast the density of long-term
              peak electricity demand. Peak electricity demand in a given
              season is subject to a range of uncertainties, including
              underlying population growth, changing technology, economic
              conditions, prevailing weather conditions (and the timing of
              those conditions), as well as the general randomness inherent in
              individual usage. It is also subject to some known calendar
              effects due to the time of day, day of week, time of year, and
              public holidays. A comprehensive forecasting solution is
              described in this paper. First, semi-parametric additive models
              are used to estimate the relationships between demand and the
              driver variables, including temperatures, calendar effects and
              some demographic and economic variables. Then the demand
              distributions are forecasted by using a mixture of temperature
              simulation, assumed future economic scenarios, and residual
              bootstrapping. The temperature simulation is implemented through
              a new seasonal bootstrapping method with variable blocks. The
              proposed methodology has been used to forecast the probability
              distribution of annual and weekly peak electricity demand for
              South Australia since 2007. The performance of the methodology is
              evaluated by comparing the forecast results with the actual
              demand of the summer 2007-2008.",
  journal  = "IEEE Transactions on Power Systems",
  volume   =  25,
  number   =  2,
  pages    = "1142-1153",
  month    =  may,
  year     =  2010,
  url      = "http://ieeexplore.ieee.org/document/5345698/",
  keywords = "Density forecast; Long-term demand forecasting; Simulation; Time
              series"
}

@ARTICLE{Fan2012-bs,
  title    = "Short-term load forecasting based on a semi-parametric additive
              model",
  author   = "Fan, Shu and Hyndman, Rob J",
  abstract = "Short-term load forecasting is an essential instrument in power
              system planning, operation, and control. Many operating decisions
              are based on load forecasts, such as dispatch scheduling of
              generating capacity, reliability analysis, and maintenance
              planning for the generators. Overestimation of electricity demand
              will cause a conservative operation, which leads to the start-up
              of too many units or excessive energy purchase, thereby supplying
              an unnecessary level of reserve. On the other hand,
              underestimation may result in a risky operation, with
              insufficient preparation of spinning reserve, causing the system
              to operate in a vulnerable region to the disturbance. In this
              paper, semi-parametric additive models are proposed to estimate
              the relationships between demand and the driver variables.
              Specifically, the inputs for these models are calendar variables,
              lagged actual demand observations, and historical and forecast
              temperature traces for one or more sites in the target power
              system. In addition to point forecasts, prediction intervals are
              also estimated using a modified bootstrap method suitable for the
              complex seasonality seen in electricity demand data. The proposed
              methodology has been used to forecast the half-hourly electricity
              demand for up to seven days ahead for power systems in the
              Australian National Electricity Market. The performance of the
              methodology is validated via out-of-sample experiments with real
              data from the power system, as well as through on-site
              implementation by the system operator.",
  journal  = "IEEE Transactions on Power Systems",
  volume   =  27,
  number   =  1,
  pages    = "134-141",
  month    =  feb,
  year     =  2012,
  url      = "http://ieeexplore.ieee.org/document/5985500/",
  keywords = "Additive model; forecast distribution; short-term load
              forecasting; time series"
}

@INPROCEEDINGS{Ben_Taieb2017-ok,
  title      = "Regularization in Hierarchical Time Series Forecasting With
                Application to Electricity Smart Meter Data",
  booktitle  = "{Thirty-First} {AAAI} Conference on Artificial Intelligence",
  author     = "Ben Taieb, Souhaib and Yu, Jiafan and Neves Barreto, Mateus and
                Rajagopal, Ram",
  abstract   = "Accurate electricity demand forecast plays a key role in
                sus-tainable power systems. It enables better decision making
                in the planning of electricity generation and distribution for
                many use cases. The electricity demand data can often be
                rep-resented in a hierarchical structure. For example, the
                electric-ity consumption of a whole country could be
                disaggregated by states, cities, and households. Hierarchical
                forecasts re-quire not only good prediction accuracy at each
                level of the hierarchy, but also the consistency between
                different levels. State-of-the-art hierarchical forecasting
                methods usually ap-ply adjustments on the individual level
                forecasts to satisfy the aggregation constraints. However, the
                high-dimensionality of the unpenalized regression problem and
                the estimation errors in the high-dimensional error covariance
                matrix can lead to increased variability in the revised
                forecasts with poor pre-diction performance. In order to
                provide more robustness to estimation errors in the
                adjustments, we present a new hier-archical forecasting
                algorithm that computes sparse adjust-ments while still
                preserving the aggregation constraints. We formulate the
                problem as a high-dimensional penalized re-gression, which can
                be efficiently solved using cyclical coor-dinate descent
                methods. We also conduct experiments using a large-scale
                hierarchical electricity demand data. The results confirm the
                effectiveness of our approach compared to state-of-the-art
                hierarchical forecasting methods, in both the spar-sity of the
                adjustments and the prediction accuracy. The pro-posed approach
                to hierarchical forecasting could be useful for energy
                generation including solar and wind energy, as well as numerous
                other applications.",
  year       =  2017,
  conference = "AAAI Conference on Artificial Intelligence"
}