diff options
-rw-r--r-- | nerv/doc/source/conf.py | 18 | ||||
-rw-r--r-- | nerv/doc/source/overview.rst | 127 | ||||
-rw-r--r-- | nerv/doc/source/user.rst | 6 |
3 files changed, 140 insertions, 11 deletions
diff --git a/nerv/doc/source/conf.py b/nerv/doc/source/conf.py index 4d6119f..c17c333 100644 --- a/nerv/doc/source/conf.py +++ b/nerv/doc/source/conf.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Nerv documentation build configuration file, created by +# NERV documentation build configuration file, created by # sphinx-quickstart on Thu Jun 25 19:40:07 2015. # # This file is execfile()d with the current directory set to its @@ -50,9 +50,9 @@ source_suffix = '.rst' master_doc = 'index' # General information about the project. -project = u'Nerv' -copyright = u'2015, Ted Yin, Tianxing He' -author = u'Ted Yin, Tianxing He' +project = u'NERV' +copyright = u'2015, SJTU SpeechLab' +author = u'SJTU SpeechLab' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -204,7 +204,7 @@ html_static_path = ['_static'] #html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'Nervdoc' +htmlhelp_basename = 'nervdoc' # -- Options for LaTeX output --------------------------------------------- @@ -226,7 +226,7 @@ latex_elements = { # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'Nerv.tex', u'Nerv Documentation', + (master_doc, 'nerv.tex', u'NERV Documentation', u'Ted Yin, Tianxing He', 'manual'), ] @@ -256,7 +256,7 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'nerv', u'Nerv Documentation', + (master_doc, 'nerv', u'NERV Documentation', [author], 1) ] @@ -270,8 +270,8 @@ man_pages = [ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'Nerv', u'Nerv Documentation', - author, 'Nerv', 'One line description of project.', + (master_doc, 'nerv', u'NERV Documentation', + author, 'NERV', 'One line description of project.', 'Miscellaneous'), ] diff --git a/nerv/doc/source/overview.rst b/nerv/doc/source/overview.rst new file mode 100644 index 0000000..ccdd2ef --- /dev/null +++ b/nerv/doc/source/overview.rst @@ -0,0 +1,127 @@ +Overview +======== + +What is NERV? +------------- + +NERV is a general-purpose deep learning toolkit designed to be be simple, +lightweight and extensible. The name "NERV" comes from the German word "nerv" +for "nerve" in English. It is also a fictional organization in the *Neon +Genesis Evangelion*, a famous anime. + + +Why NERV? +--------- + +In recent years, the invention and prevalence of the deep neural network (DNN) +and related deep learning techniques have given rise to many tools and toolkits +which are designed for constructing and training neural networks which could +facilitate and routinize the research cycle of deep learning applied to areas +such as speech processing, computer vision, natural language processing and so +on. Such tools or toolkits can be categorized into two by design: task-specific or general-purpose. + +The first category tries to address the deep learning in a direct way. These +tools usually have a very specific goal, which means to support a certain type +of neural network model and provides with peripheral facilities dedicated to +one specific task, such as image classification or phone state prediction. Not +only the network model is hard-coded into the very core of the tool, but some +modifications or tricks that are only specific to a certain task are wired into +the code. The effect of this approach is two-fold. On the one hand, they have a +complete and tightly designed system that appears to provide with a simple user +interface. Thus, researchers who are familiar with that specific area can use +it easily. Also, because the network is hard-coded and task-specific, the +implementation could be easy and optimization could be effective so that offers +a very efficient running speed (such as Current). However, on the other hand, +these usability and efficiency come at the cost of lacking reusability and +flexibility. People need to hack the code and make modifications to suit the +tool to another structure of neural network model, and it is very difficult and +tricky to use the tool designed for one specific area (for example, image +classification) in another area (like speech recognition). Engineering details +and implementation tricks refrain people from doing so. Caffee, which is +designed for computer vision, has a comprehensive variety of tools for +processing images and training convolution neural networks (CNN). But it cannot +be directly applied to speech processing tasks. Luckily, there is Kaldi, a +counterpart in speech processing that can process wave-form files, extract the +acoustic features, train GMM-HMM models, fully-connected DNN models, LSTM +models and so on. + +The second category strives to be general-purpose. As mentioned above, there are two types of generalities: + +- General among different network structures +- General among different tasks + +The advantages of such general approach is obvious: we can train different +network structures as long as the required basic computation units are provided +by the toolkit. Besides, general-purpose toolkits usually have a unified +interface for data input and output (I/O) which is the most task-specific +part. Therefore, the task-specific implementation details are limited to +separated I/O modules as possible. This concept of design also presents in +operating systems where device-specific code are modularized and isolated from +the core part and several abstraction layers are often used. Therefore, by this +approach, general-purpose toolkits usually can be potentially adapted to +various kinds of tasks without hacking or changing core code. Admittedly, the +generalities are accomplished at the cost of losing implementation simplicity +and efficiency. However, the overhead brought by abstraction and a relatively +complex design would not be a huge problem given the importance of those +benefits. Over the recent years, there have been many new network structures +being proposed, examined, and applied to various kinds of tasks. Given the +trend and difficulty to hack or modify the task-specific tools, the +benefits in generalities outweigh those concerns and are handy. + +There are some well-known and successful general-purpose deep learning toolkit. +They all have their strengths and weaknesses. + +Theano is numerical computation library for Python. It supports mainstream +neural network structures such as fully-connected DNN, CNN, recurrent neural +network (RNN) and its variants like long-short term memory network (LSTM). It +also has a short learning curve once the user get to know about the symbolic +computation and appears to be friendly to new users. However, some concepts may +not be very friendly to users (such as "scans"), the compilation time of +network could be very long for complex models, Python-based environment implies +the toolkit cannot be lightweight, and large runtime footprint makes it hard to +port the toolkit to embedding environment (where resources are very limited). +So here comes a strong competitor, Torch. + +Torch is an open source machine learning library whose initial release dates +back to 2002. Their design goals are similar: to provide with a "MATLAB-like" +low-level computation library that comes with separated functional blocks that +can be used by users to build and train their own network. Torch has a steeper +learning curve than Theano, while it is more lightweight than the latter using +Lua as the scripting language and only implement the time-consuming operations +in C. + +However, Torch is not perfect. Its limitation comes with its advantage: there +are few general training or network building patterns inside the toolkit. +Torch plays a role more like MATLAB, so users need to write their own code to +put all things together: to deal with data I/O, mini-batching, training +scheduling and so on, many of which are not a trivial task, but are repeated by +each user in their scripts. This may lead to the phenomenon that each user has +her own code base, and on this level, it degrades Torch to a task-specific +tool. + +Imagine a user wants to build and train an state-of-the-art LSTM model for +acoustic modeling, and she needs to read from some pre-existing feature files +extracted by popular speech processing framework like HTK or Kaldi. She has to +implement the data I/O all on her own. Moreover, she has to implement +mini-batching, network unrolling, BPTT, etc., to deal with loops and schedule +the training. What's worse, when another user wants to train a different model +for the same task or train the same model for a different task, he has two +choices: to write his own training script or to copy the script from the +previous person. However, either choice is not ideal, because it turns +scripting into reinventing the wheel or hacking other's code, which goes +against the goal of a general-purpose toolkit. In fact, Torch seemingly goes +towards a more distributed and isolated development style, which can ease the +project management and collaboration, but also implies less collaboration +because people no longer work together at all but tend to write their own +scripts with duplicate functionalities. So there will be less and less common +code base among users' scripts. + +- simplicity: the learning curve is not steep and code are straight forward +- extensibility: users can quickly become developers and add the missing + modules or tailor the toolkit for their needs; major building blocks in NERV + are modularized and the interfaces are standardized so that users can plug-in + their additionally implemented ones and even use the modules implemented by others +- lightweight: NERV strives to stay at minimal core code base and dependencies, + which makes it fairly easy to embed it into other task-specific tools, such + as Kaldi (in speech-processing). + diff --git a/nerv/doc/source/user.rst b/nerv/doc/source/user.rst index 48f0c69..eb85deb 100644 --- a/nerv/doc/source/user.rst +++ b/nerv/doc/source/user.rst @@ -1,5 +1,7 @@ User Manual =========== -- Overview -- `NERV Lua Reference <lua/>`_ +.. toctree:: + + overview + NERV Lua Reference <lua/#http://> |