ModErn Text Analysis
META Enumerates Textual Applications
Functions
profile.cpp File Reference
#include <iostream>
#include <string>
#include <unordered_set>
#include <vector>
#include "meta/analyzers/analyzer.h"
#include "meta/analyzers/tokenizers/icu_tokenizer.h"
#include "meta/analyzers/filters/all.h"
#include "meta/analyzers/ngram/ngram_word_analyzer.h"
#include "meta/corpus/document.h"
#include "cpptoml.h"
#include "meta/io/filesystem.h"
#include "meta/parser/sr_parser.h"
#include "meta/sequence/io/ptb_parser.h"
#include "meta/sequence/perceptron.h"
#include "meta/sequence/sequence.h"
#include "meta/util/shim.h"

Functions

int print_usage (const std::string &prog)
 Prints help for this executable. More...
 
std::string no_ext (const std::string &file)
 
template<class Stream >
void write_file (Stream &stream, const std::string &in_name, const std::string &out_name)
 
void stem (const std::string &file, const cpptoml::table &)
 Performs stemming on a text file. More...
 
void stop (const std::string &file, const cpptoml::table &config)
 Performs stopword removal on a text file. More...
 
void pos (const std::string &file, const cpptoml::table &config, bool replace)
 Performs part-of-speech tagging on a text file. More...
 
void parse (const std::string &file, const cpptoml::table &config)
 Parses all sentences in a text file.
 
void freq (const std::string &file, cpptoml::table &config, int64_t n)
 Performs frequency analysis on a text file. More...
 
int main (int argc, char *argv[])
 

Detailed Description

Author
Sean Massung

Function Documentation

§ print_usage()

int print_usage ( const std::string &  prog)

Prints help for this executable.

Parameters
progThe name of the current executable
Returns
the exit code for this program

§ no_ext()

std::string no_ext ( const std::string &  file)
Parameters
fileThe filename to modify
Returns
the base filename without an extension

§ write_file()

template<class Stream >
void write_file ( Stream &  stream,
const std::string &  in_name,
const std::string &  out_name 
)
Parameters
streamToken stream to read from
in_nameInput filename
out_nameOutput filename

§ stem()

void stem ( const std::string &  file,
const cpptoml::table &   
)

Performs stemming on a text file.

Parameters
fileThe input file
configConfiguration settings

§ stop()

void stop ( const std::string &  file,
const cpptoml::table &  config 
)

Performs stopword removal on a text file.

Parameters
fileThe input file
configConfiguration settings

§ pos()

void pos ( const std::string &  file,
const cpptoml::table &  config,
bool  replace 
)

Performs part-of-speech tagging on a text file.

Parameters
fileThe input file
configConfiguration settings
replaceWhether or not to replace words with their POS tags

§ freq()

void freq ( const std::string &  file,
cpptoml::table &  config,
int64_t  n 
)

Performs frequency analysis on a text file.

Parameters
fileThe input file
configConfiguration settings
nThe n-gram value to use in tokenization