88 #define ANYEXTRACT_VERSION \ 89 "ANYEXTRACT V1.27 extract data files, using index file" 97 #include <aff/seriesoperators.h> 98 #include <tfxx/misc.h> 99 #include <tfxx/commandline.h> 100 #include <tfxx/stringfunc.h> 101 #include <tfxx/error.h> 102 #include <tfxx/seitosh.h> 103 #include <datrwxx/tracereader.h> 104 #include <libtime++.h> 105 #include <tfxx/regexx.h> 106 #include <datrwxx/formats.h> 107 #include <datrwxx/error.h> 108 #include <datrwxx/writeany.h> 179 "ERROR (Series): other type is already slected!\n" 180 "internal inconsistency to be considered a bug");
187 "ERROR (Series): other type is already slected!\n" 188 "internal inconsistency to be considered a bug");
195 "ERROR (Series): other type is already slected!\n" 196 "internal inconsistency to be considered a bug");
203 TFXX_assert(this->
selected(),
"ERROR (Series): no type is selected!\n" 204 "internal inconsistency to be considered a bug");
206 { retval=
Mds.shape(); }
208 { retval=
Mis.shape(); }
210 { retval=
Mfs.shape(); }
214 {
return this->
shape().first(); }
216 {
return this->
shape().last(); }
306 std::string timestring=entry.
wid2.date.timestring();
307 os << timestring.substr(4,23) <<
" ";
308 os << entry.
wid2.station <<
" ";
309 os << entry.
wid2.channel <<
" ";
310 os << entry.
wid2.auxid <<
" ";
311 os << entry.
wid2.instype <<
" ";
313 os << entry.
itrace <<
" ";
358 int main(
int iargc,
char* argv[])
365 "usage: anyextract [common options] [extract options]\n" 366 " index [ index [...] ] outfile\n" 367 " or: anyextract -GAPFIND [common options] [gap options]\n" 368 " index [ index [...] ]\n" 369 " or: anyextract --help|-h|-xhelp" "\n" 371 "common options are:\n" 372 " [-verbose] [-DEBUG] [-dump]\n" 373 " [-type f] [-overwrite]\n" 374 " [-sfirst date] [-slast date] [-sduration dur]\n" 375 " [-sstation regex] [-schannel regex]\n" 376 " [-sinstrument regex] [-sauxid regex]\n" 377 " [-dtr f] [-tr] [-br]\n" 378 "extract options are:\n" 379 " [-GSE] [-int] [-float] [-nofree]" "\n" 380 " [-calib f] [-calper p]" "\n" 383 " [-GAPFIND] [-Ggs f] [-Gcs f] [-Gprint[=file]]" "\n" 384 " [-Gbin s] [-Gsummarize l] [-Ggpt f]" "\n" 390 "read index file(s) and extract defined parts of the dataset" "\n" 392 "You have to use \"anyindex\" first to create one or more index" "\n" 393 "files referencing the data files. \"anyextract\" will scan these" "\n" 394 "index files and decide from which data files the requested" "\n" 395 "dataset can be compiled. It will arrange this information" "\n" 396 "and build chains of input datasets for each unique stream" "\n" 397 "(defined by station, channel, instrument and auxid) and will" "\n" 398 "then read and compile the time series for the output file." "\n" 400 "The input file type including file type modifiers is defined in the\n" 401 "index file. The output file type as well as the data type (float,\n" 402 "double, or int) used for copying samples can be selcted on the\n" 405 "The program can be executed in two differen modes:\n" 406 "1. data extraction: reads the index and assembles data\n" 407 "2. gap analysis: reads the index and evaluates gaps in the data\n" 409 "Command line arguments and options:\n" 410 " (command line options may be abbreviated, i.e. \"-v\", \"-verb\",\n" 411 " and \"-verbose\" are all equivalent)\n" 413 "-help print online help\n" 414 "-xhelp provides extended online help (explains data formats etc.)\n" 416 "index ... index file(s)" "\n" 418 "Options common to both modes:\n" 419 " -verbose be verbose" "\n" 420 " -DEBUG debugging output (be very verbose)" "\n" 421 " -dump dump intermediate results" "\n" 423 " -overwrite overwrite existing output files" "\n" 426 " -sfirst date date and time of first sample to extract\n" 427 " -slast date date and time of last sample to extract\n" 428 " -sduration dur define end of time window by duration \"dur\"\n" 429 " -sstation regex select station by regular expression \"regex\"\n" 430 " -schannel regex select channel by regular expression \"regex\"\n" 431 " -sinstrument regex select instrument by regular expression \"regex\"\n" 432 " -sauxid regex select auxid by regular expression \"regex\"\n" 434 " Options to control the handling of inconsitencies:\n" 435 " -dtr f allow tolerance when matching times of consecutive\n" 436 " data blocks; ''f'' defines the tolerance as a fraction\n" 437 " of the sampling interval, that is tolerated for the\n" 438 " residual of sample times\n" 439 " Notice: the tolerance is tested against the residual\n" 440 " of consecutive input blocks, not against the residual\n" 441 " of all extracted samples and the next input block\n" 442 " -br redundant input samples are a break condition;\n" 443 " program does not abort\n" 444 " -tr tolerate redundant samples; i.e. do not abort if input\n" 445 " blocks are overlapping, which means that the same\n" 446 " exists twice in different input blocks\n" 447 " this clears the -br option if set\n" 449 " Option to select file type for time series output:\n" 450 " -type f write output file of format \"f\"\n" 451 " in GAPFIND mode this is used together with options\n" 452 " \"-Ggs\" and \"-Gcs\"\n" 454 "Options controlling data extraction:\n" 455 " outfile output file (do not specify this in gap finding" "\n" 456 " mode, i.e. together with option -GAPFIND)" "\n" 458 " Options to select file type for time series output:\n" 459 " -type f write output file of format \"f\"\n" 460 " -GSE write GSE compatible data" "\n" 461 " setting this option implies option -int" "\n" 462 " and option \"-type gse\"" "\n" 463 " -int use integer data type for copying of samples" "\n" 464 " -float use float (single precision) data type for copying of\n" 467 " Options to adjust content of headers in output files:\n" 468 " -calib f set calib entry in output file to f" "\n" 469 " see notes below" "\n" 470 " -calper p set calper entry in output file to p" "\n" 471 " -nofree do not write FREE blocks, even if output format supports\n" 472 " FREE blocks (just speeds up data I/O of subsequent\n" 475 " Option for debugging purposes:\n" 476 " -RANGECHECK perform array index range check (debug option)" "\n" 478 "Options controlling gap analysis:\n" 479 " -GAPFIND find only gaps - do not extract data\n" 480 " do not specify an output file in this case\n" 481 " the -verbose option increases the verbosity of gap\n" 483 " -Gbin s set bin size for gap and completeness series\n" 484 " -Ggs f gap distribution will be written to file \"f\"\n" 485 " the output file is a time series file in format\n" 486 " specified by option \"-type f\"\n" 487 " it specifies the number of missing samples in each bin\n" 488 " as specified by option \"-Gbin s\"\n" 489 " -Gcs f completeness distribution will be written to file \"f\"\n" 490 " the output file is a time series file in format\n" 491 " specified by option \"-type f\"\n" 492 " it specifies the completeness of the data in each bin\n" 493 " as specified by option \"-Gbin s\"in per cent, where\n" 494 " 100% means that no sample is missing\n" 495 " -Gprint[=f] print a list of gaps to stdout\n" 496 " if \"f\" is defined, write this list to file \"f\"\n" 497 " -Gsummarize l set summerize level for -Gprint to \"l\"\n" 498 " l=1: summarize results for each stream\n" 499 " l=2: summarize total results only\n" 500 " l=3: just output total number of gaps\n" 501 " l=4: just output total number of missing samples\n" 502 " -Ggpt f create gnuplot plot file with basename \"f\"\n" 503 " the filename will be complemented by the extension\n" 504 " \".gpt\"; by running gnuplot on this file, a graphical\n" 505 " display of missing data organized in bins of the size\n" 506 " specified by option \"-Gbin s\" is created in a\n" 507 " Postscript file with basename \"f\" and extension \".ps\"\n" 508 " Notice: Sample index values typically are represented by variables\n" 509 " of type int (integers of 4 bytes). The maximum number of\n" 510 " samples which can by handled that way is slightly more than\n" 511 " 2.e9, which is fully sufficient when handling actual time\n" 513 " When doing gap-analysis you may run into cases where\n" 514 " this range is exceeded. If sampling rate is larger than 68 sps\n" 515 " the total number of samples for one year exceeds the value\n" 516 " range. Take in mind, that such cases might not be handled\n" 518 " This limitation is imposed on the program by the used software\n" 519 " libraries and cannot easily be overcome.\n" 523 char extended_help_text[]=
525 "calib and calper" "\n" 526 "----------------" "\n" 527 " The definition of the parameters calib and calper in the GSE2.0" "\n" 528 " format specification is ambigious. The format caontains no field" "\n" 529 " that is able to specify whether the sensor has the response of" "\n" 530 " an electrodynamic seismometer or an accelerometer, but the" "\n" 531 " conversion factor calib is given in units of displacement." "\n" 532 " It is not specified, whether the factor has to be applied" "\n" 533 " with deconvolution (which would be reasonable, since units" "\n" 534 " are given relative to displacement) or without deconvolution" "\n" 535 " as is common practice. For this reason there exists only a kind" "\n" 536 " of convention how this fields are used. Since these fields at least" "\n" 537 " require the knowledge of the instrument's response, the program" "\n" 538 " usually sets both parameters to the default value -1. in the" "\n" 539 " output data. But beware: Some programs like SeismicHandler" "\n" 540 " rely on these fields, when reading GSE data. They will reject" "\n" 541 " the default -1. and replace the value by 1., which results in" "\n" 542 " a scaling of amplitudes with a factor 2*pi which is usually" "\n" 543 " not desired. In these cases it is save to choose" "\n" 544 " -calib 1. -calper 6.2831853" "\n" 546 " -calib 0.15915494 -calper 1." "\n" 547 " With these settings, the input data will not be scaled when" "\n" 548 " read into SeismicHandler." "\n" 552 using namespace tfxx::cmdline;
553 static Declare options[]=
558 {
"verbose",arg_no,
"-"},
560 {
"overwrite",arg_no,
"-"},
562 {
"sfirst",arg_yes,
"-"},
564 {
"slast",arg_yes,
"-"},
566 {
"sstation",arg_yes,
".*"},
568 {
"schannel",arg_yes,
".*"},
570 {
"sinstrument",arg_yes,
".*"},
572 {
"sauxid",arg_yes,
".*"},
574 {
"DEBUG",arg_no,
"-"},
576 {
"RANGECHECK",arg_no,
"-"},
578 {
"GAPFIND",arg_no,
"-"},
580 {
"dtr",arg_yes,
"0."},
588 {
"sduration",arg_yes,
"-"},
594 {
"calib",arg_yes,
"-1."},
596 {
"calper",arg_yes,
"-1."},
598 {
"xhelp",arg_no,
"-"},
600 {
"nofree",arg_no,
"-"},
602 {
"type",arg_yes,
"sff"},
604 {
"float",arg_no,
"-"},
606 {
"Gbin",arg_yes,
"0/1"},
608 {
"Gprint",arg_opt,
"-"},
614 {
"Gsummarize",arg_yes,
"0"},
616 {
"Ggpt",arg_yes,
"-"},
623 cerr << usage_text << endl;
624 cerr << tfxx::seitosh::repository_reference << endl;
629 Commandline cmdline(iargc, argv, options);
632 if (cmdline.optset(0) || cmdline.optset(21))
634 cerr << usage_text << endl;
635 cerr << help_text << endl;
636 datrw::supported_data_types(cerr);
637 if (cmdline.optset(21))
639 cerr << endl << extended_help_text << endl;
640 datrw::online_help(cerr);
642 cerr << libtime::usage_time_format_string << endl;
644 cerr << endl << tfxx::seitosh::repository_reference << endl;
654 if (opt.
firstset) { opt.
first=libtime::TAbsoluteTime(cmdline.string_arg(3)); }
655 if (opt.
lastset) { opt.
last=libtime::TAbsoluteTime(cmdline.string_arg(4)); }
660 opt.
debug=cmdline.optset(9);
670 if (cmdline.optset(16))
673 "ERROR: you must specify the beginning of the time window too");
675 "ERROR: conflicting definitions for time window");
677 opt.
last=opt.
first+libtime::TRelativeTime(cmdline.string_arg(16));
679 opt.
integerdata=(cmdline.optset(17)||cmdline.optset(18));
682 opt.
newcalib=cmdline.double_arg(19);
688 opt.
binsize=cmdline.string_arg(25);
695 opt.
summarizelevel=
static_cast<unsigned int>(cmdline.int_arg(29));
715 cout <<
"GSE output selected: ";
718 cout <<
"set output format to \"" << opt.
outputformat <<
"\" ";
719 cout <<
"set internal data type to integer" << endl;
725 TFXX_assert(cmdline.extra(),
"ERROR: missing index file!");
726 outfile=cmdline.next();
728 while (cmdline.extra())
731 infiles.push_back(outfile);
732 outfile=cmdline.next();
737 infiles.push_back(outfile);
741 cout <<
"SPECIAL MODE: find gaps in input data - do not extract data" 747 TFXX_assert((infiles.size()>0),
"ERROR: missing output file!");
755 cout <<
"DUMP stage 0 results: names of index files to process" << endl;
756 Tvecofstrings::const_iterator iv=infiles.begin();
757 while (iv!=infiles.end())
759 cout <<
" " << *iv << endl;
762 cout <<
" output will be written to:" << endl
763 <<
" " << outfile << endl;
793 cout <<
"step 1: collect matching index entries" << endl;
794 cout <<
"data selection:" << endl;
796 { cout <<
" first sample: " << opt.
first.timestring() << endl; }
798 { cout <<
" first sample: no time specified" << endl; }
800 { cout <<
" last sample: " << opt.
last.timestring() << endl; }
802 { cout <<
" last sample: no time specified" << endl; }
804 cout <<
" duplicate input samples are tolerated" << endl;
806 cout <<
" duplicate input samples are a break condition" << endl;
808 cout <<
" sampling raster tolerance is " 810 <<
" of sampling interval" << endl;
817 "ERROR: selected time window makes no sense!");
829 cout <<
" station: " << rgxx.
station.expression() << endl;
830 cout <<
" channel: " << rgxx.
channel.expression() << endl;
831 cout <<
" instrument: " << rgxx.
instrument.expression() << endl;
832 cout <<
" auxid: " << rgxx.
auxid.expression() << endl;
839 Tvecofstrings::const_iterator infile=infiles.begin();
840 while( infile!=infiles.end())
843 { cout <<
"** open next index file: " << *infile << endl; }
844 std::ifstream is(infile->c_str());
854 { cout <<
"DEBUG: read next entry:" << endl; }
857 std::string formatID;
861 getline(is, formatID);
862 indexentry.
wid2.read(is);
865 cout <<
" filename: " << indexentry.
filename << endl;
866 cout <<
" trace #: " << indexentry.
itrace << endl;
867 cout <<
" data format: " <<
869 cout <<
" " << indexentry.
wid2.line();
875 { match=(opt.
first<=sff::wid2lastsample(indexentry.
wid2)); }
877 { match=(opt.
last>=indexentry.
wid2.date); }
878 if (match) { match=rgxx.
station.match(indexentry.
wid2.station); }
879 if (match) { match=rgxx.
channel.match(indexentry.
wid2.channel); }
880 if (match) { match=rgxx.
instrument.match(indexentry.
wid2.instype); }
881 if (match) { match=rgxx.
auxid.match(indexentry.
wid2.auxid); }
885 collection.push_back(indexentry);
888 cout <<
" match: " << indexentry << endl;
898 cout <<
" scanned " << nscanned <<
" entries in index file" << endl;
904 cout <<
"found " << collection.size() <<
" matching entries" << endl;
912 cout <<
"DUMP stage 1 results: all matching entries" << endl;
913 Tvecofindexentries::const_iterator iv=collection.begin();
914 while(iv != collection.end())
917 const sff::WID2& wid2=entry.
wid2;
918 cout <<
" " << wid2.station
919 <<
" " << wid2.channel
921 <<
" " << wid2.instype
922 <<
" dt=" << wid2.dt <<
"s" 923 <<
" " << wid2.date.timestring().substr(4,23)
924 <<
" n=" << wid2.nsamples
955 cout <<
"step 2: sort collected index entries" << endl;
960 equalstreams(sff::Fstation|sff::Fchannel|sff::Fdt|sff::Fauxid);
963 if (opt.
verbose) { cout <<
" fill raw chains" << endl; }
965 for (
unsigned int ientry=0; ientry<collection.size(); ++ientry)
971 newlist.push_back(ientry);
972 chain.push_back(newlist);
978 for (
unsigned int ilist=0; ilist<chain.size(); ++ilist)
980 if (equalstreams(collection[ientry].wid2,
981 collection[chain[ilist].front()].wid2))
988 newlist.push_back(ientry);
989 chain.push_back(newlist);
993 chain[imatch].push_back(ientry);
999 if (opt.
verbose) { cout <<
" sort chain(s) for " 1000 << chain.size() <<
" unique stream(s)" << endl; }
1002 for (
unsigned int ilist=0; ilist<chain.size(); ++ilist)
1003 { chain[ilist].sort(comparer); }
1008 for (
unsigned int ilist=0; ilist<chain.size(); ++ilist)
1010 const Tintlist& thelist=chain[ilist];
1011 cout <<
" next stream:" << endl;
1012 const IndexEntry& entry=collection[thelist.front()];
1013 cout <<
" station: " << entry.
wid2.station;
1014 cout <<
" channel: " << entry.
wid2.channel;
1015 cout <<
" instrument: " << entry.
wid2.instype;
1016 cout <<
" auxid: " << entry.
wid2.auxid << endl;
1017 Tintlist::const_iterator iinlist=thelist.begin();
1018 while( iinlist!=thelist.end())
1020 cout <<
" " << collection[*iinlist] << endl;
1021 sff::WID2 thewid2=collection[*iinlist].wid2;
1025 << collection[*iinlist].filename
1026 <<
" " << collection[*iinlist].itrace
1027 <<
" " << collection[*iinlist].dataformat << endl;
1028 cout <<
"DEBUG: " << thewid2.line();
1030 cout <<
" " << thewid2.date.timestring()
1031 <<
" - " << sff::wid2lastsample(thewid2).timestring()
1043 cout <<
"DUMP stage 2 results: chains for each channel" << endl;
1044 Tvecofintlist::const_iterator iv=chain.begin();
1045 while (iv != chain.end())
1047 Tintlist::const_iterator il=iv->begin();
1049 const sff::WID2& hw2=head.
wid2;
1051 <<
" " << hw2.channel
1053 <<
" " << hw2.instype
1054 <<
" (dt=" << hw2.dt <<
"s):" << endl;
1055 while (il != iv->end())
1058 libtime::TAbsoluteTime lastdate=sff::wid2lastsample(entry.
wid2);
1064 << entry.
wid2.date.timestring().substr(4,23) <<
" - " 1065 << lastdate.timestring().substr(4,23)
1103 cout <<
"step 3: define copy operations" << endl;
1108 libtime::TRelativeTime tolerancewindow(0);
1114 for (
unsigned int ichain=0; ichain<chain.size(); ++ichain)
1122 const Tintlist& thelist=chain[ichain];
1125 IndexEntry &theentry(collection[thelist.front()]);
1126 cout <<
"scan chain for stream" << endl;
1127 cout <<
" * station: " << theentry.
wid2.station;
1128 cout <<
" * channel: " << theentry.wid2.channel;
1129 cout <<
" * instrument: " << theentry.wid2.instype;
1130 cout <<
" * auxid: " << theentry.wid2.auxid;
1131 cout <<
" * dt=" << theentry.wid2.dt <<
"s" << endl;
1134 libtime::TAbsoluteTime nextdate;
1138 bool newstream=
true;
1142 Tintlist::const_iterator ilist=thelist.begin();
1143 while (ilist!=thelist.end())
1153 cout <<
" * begin a new output trace" << endl;
1155 cout <<
" * process collection entry #" << *ilist <<
": " 1160 << entry.
wid2.date.timestring()
1162 << sff::wid2lastsample(entry.
wid2).timestring() << endl;
1174 bool complete=
false;
1190 nextdate = opt.
first;
1195 nextdate = entry.
wid2.date;
1207 bool gc_not_empty = (!block.empty());
1208 if (!gc_not_empty) { nextdate=entry.
wid2.date; }
1210 bool gc_raster_residual_exceeds_tolerance
1211 = (sff::wid2isamplerest(entry.
wid2,nextdate) > tolerancewindow);
1212 long int gc_sample_index
1213 = sff::wid2isample(entry.
wid2,nextdate);
1214 bool gc_sample_overlap = (gc_sample_index > 0);
1215 bool gc_sample_match = (gc_sample_index == 0);
1216 bool gc_sample_gap = (gc_sample_index < 0);
1217 bool gc_no_new_data = ((nextdate - tolerancewindow)
1218 > sff::wid2lastsample(entry.
wid2));
1224 cout <<
"DEBUG GAP conditions:" << endl;
1226 cout <<
"-1--- block is not empty" <<endl;
1227 if (gc_raster_residual_exceeds_tolerance)
1228 cout <<
"-2--- time raster residual exceeds tolerance" <<endl;
1229 if (gc_no_duplicates_allowed)
1230 cout <<
"-3--- duplicate samples are not allowed" << endl;
1231 if (gc_sample_overlap)
1232 cout <<
"-4--- sample windows overlap" << endl;
1233 if (gc_sample_match)
1234 cout <<
"-5--- sample windows match" << endl;
1236 cout <<
"-6--- sample windows gap" << endl;
1237 cout <<
"----- sample index: " << gc_sample_index << endl;
1248 || gc_raster_residual_exceeds_tolerance
1249 || (gc_no_duplicates_allowed
1250 && gc_sample_overlap)))
1253 if (gc_sample_overlap
1254 && gc_no_duplicates_allowed
1255 && (!gc_break_on_duplicate))
1257 cout <<
"duplicates: " << endl;
1258 cout <<
" next sample requested: " 1259 << nextdate.timestring() << endl;
1260 cout <<
" is later than first sample in input: " 1261 << entry.
wid2.date.timestring() << endl;
1262 cout <<
" when processing entry" << endl;
1263 cout << entry << endl;
1264 TFXX_abort(
"ERROR: duplicate input samples are fatal!");
1269 << entry.
wid2.station <<
"," 1270 << entry.
wid2.channel <<
"," 1271 << entry.
wid2.auxid <<
"): ";
1272 if (nextdate < entry.
wid2.date)
1274 cout << nextdate.timestring() <<
" (expected)" <<
" < " 1275 << entry.
wid2.date.timestring() <<
" (actual)" << endl;
1277 else if ((entry.
wid2.date <= nextdate) &&
1278 (sff::wid2isamplerest(entry.
wid2,nextdate)
1281 cout << nextdate.timestring() <<
" (expected)" <<
" >= " 1282 << entry.
wid2.date.timestring() <<
" (actual)" << endl
1284 << sff::wid2isamplerest(entry.
wid2,nextdate).timestring()
1285 <<
" (residual to sampling raster)" 1286 <<
" > " << tolerancewindow.timestring()
1287 <<
" (tolerance)" << endl;
1299 else if (gc_sample_overlap && gc_no_duplicates_allowed)
1301 cout <<
"duplicates: ";
1302 cout << nextdate.timestring() <<
" (expected)" <<
" > " 1303 << entry.
wid2.date.timestring() <<
" (actual)" << endl;
1307 TFXX_abort(
"ERROR: undefined condition for GAP!");
1312 trace.push_back(block);
1319 else if (gc_no_new_data)
1323 cout <<
" skip this (next requested: " 1324 << nextdate.timestring() <<
")" << endl;
1333 cout <<
" define copy (next requested: " 1334 << nextdate.timestring() <<
")" << endl;
1337 theblock.
ofirst= nextosample;
1343 TFXX_assert((entry.
wid2.date <= (nextdate+tolerancewindow)),
1344 "ERROR: unexpected GAP(1): check program!");
1345 TFXX_assert((sff::wid2isamplerest(entry.
wid2,nextdate)
1346 <= tolerancewindow),
1347 "ERROR: unexpected GAP(2): check program!");
1348 theblock.
ifirst=sff::wid2isample(entry.
wid2,nextdate);
1360 if (sff::wid2lastsample(entry.
wid2)>opt.
last)
1369 nextdate=sff::wid2isample(entry.
wid2,
1374 cout <<
" copy input samples " 1376 <<
" to output samples " 1377 << theblock.
ofirst <<
"-" << nextosample-1
1380 << sff::wid2isample(entry.
wid2,theblock.
ifirst).timestring()
1382 << sff::wid2isample(entry.
wid2,theblock.
ifirst+
1387 block.push_back(theblock);
1393 cout <<
" * time window is complete" << endl;
1395 trace.push_back(block);
1419 { cout <<
"! * time window can not be completed!" << endl; }
1420 trace.push_back(block);
1432 cout <<
"DUMP stage 3 results: trace copy operations" << endl;
1433 Tvecoftraces::const_iterator iv=trace.begin();
1435 while (iv != trace.end())
1438 Tvecofblocks::const_iterator ivb=block.begin();
1439 const IndexEntry& headentry = collection[ivb->ientry];
1440 const sff::WID2& wid2=headentry.
wid2;
1445 cout <<
" " << wid2.station
1446 <<
" " << wid2.channel
1447 <<
" " << wid2.auxid
1448 <<
" " << wid2.instype
1449 <<
" dt=" << wid2.dt <<
"s" 1451 while (ivb != block.end())
1454 const IndexEntry& entry = collection[ivb->ientry];
1455 const sff::WID2& ew2=entry.
wid2;
1459 cout << theblock.
ifirst <<
"-";
1461 cout << lastinputsample <<
" -> ";
1463 cout << theblock.
ofirst <<
"-";
1466 cout <<
" " << ew2.date.timestring().substr(4,23);
1468 cout << sff::wid2isample(ew2,lastinputsample).timestring().substr(4,23);
1496 filefree.append(
"selection:");
1499 filefree.append(std::string(
"* first sample: ")+opt.
first.timestring());
1503 filefree.append(std::string(
"* first sample: any"));
1507 filefree.append(std::string(
"* last sample: ")+opt.
last.timestring());
1511 filefree.append(std::string(
"* last sample: any"));
1513 filefree.append(std::string(
"* station: ")+opt.
selstation);
1514 filefree.append(std::string(
"* channel: ")+opt.
selchannel);
1515 filefree.append(std::string(
"* instrument: ")+opt.
selinstrument);
1516 filefree.append(std::string(
"* auxid: ")+opt.
selauxid);
1518 filefree.append(
"* duplicate input samples are tolerated");
1520 filefree.append(
"* duplicate input samples are a break condition");
1523 std::ostringstream oss;
1524 oss <<
"* sampling raster tolerance is " 1526 <<
" of sampling interval";
1527 filefree.append(oss.str());
1560 cout <<
"step 4: report contiguous data" << endl;
1575 libtime::TAbsoluteTime earliesttime;
1576 libtime::TAbsoluteTime latesttime;
1581 cout <<
"chunks of contiguous data:" << std::endl;
1589 for (
unsigned int itrace=0; itrace<trace.size(); ++itrace)
1591 if (opt.
verbose) { cout <<
"compile trace #" << itrace+1 <<
":" << endl; }
1593 sff::FREE tracefree;
1594 sff::WID2 tracewid2;
1596 tracewid2=collection[thetrace[0].ientry].wid2;
1597 tracewid2.date=sff::wid2isample(tracewid2,thetrace[0].ifirst);
1600 libtime::TRelativeTime dt=libtime::double2time(tracewid2.dt);
1601 libtime::TAbsoluteTime lastdate=tracewid2.date;
1602 long int totalsamples=0;
1603 for (
unsigned int iblock=0; iblock<thetrace.size(); ++iblock)
1605 totalsamples+=thetrace[iblock].nsamples;
1608 lastdate += dt*thetrace[iblock].nsamples;
1610 tracewid2.nsamples=totalsamples;
1612 tracefree.append(
"data from:");
1613 for (
unsigned int iblock=0; iblock<thetrace.size(); ++iblock)
1617 std::ostringstream freeline;
1619 " trace #" << theentry.
itrace <<
1621 tracefree.append(freeline.str());
1626 cout << tracewid2.station <<
" " << tracewid2.channel <<
" " 1627 << tracewid2.auxid <<
" " 1628 << tracewid2.date.timestring().substr(4,26) <<
" - " 1629 << lastdate.timestring().substr(4,26) << std::endl;
1641 if (analyzecompleteness)
1654 earliesttime=opt.
first;
1658 earliesttime=tracewid2.date;
1662 latesttime=opt.
last;
1666 latesttime=lastdate;
1671 if ((!opt.
firstset) && (tracewid2.date < earliesttime))
1673 earliesttime=tracewid2.date;
1675 if ((!opt.
lastset) && (lastdate > latesttime))
1677 latesttime=lastdate;
1683 conti.
first=tracewid2.date;
1684 conti.
last=lastdate;
1685 conti.
dt=libtime::double2time(tracewid2.dt);
1686 conti.
station=tracewid2.station;
1687 conti.
channel=tracewid2.channel;
1688 conti.
auxid=tracewid2.auxid;
1689 contiguouslist.push_back(conti);
1700 if (analyzecompleteness)
1702 TFXX_debug(opt.
debug,
"main",
1703 "analyze contiguouslist and create vector of gaps");
1708 contiguouslist, opt.
debug);
1719 printgaps(os, vecofgaps, earliesttime, latesttime,
1724 std::cout << std::endl;
1725 printgaps(std::cout, vecofgaps, earliesttime, latesttime,
1737 datrw::abort_if_exists(gnuplotfile);
1739 std::ofstream ofs(gnuplotfile.c_str());
1740 libtime::TRelativeTime binsize(opt.
binsize);
1749 TFXX_debug(opt.
debug,
"main",
1750 "write gap and/or completeness series");
1751 std::ofstream* Pgofs;
1752 std::ofstream* Pcofs;
1753 datrw::oanystream* Pgos;
1754 datrw::oanystream* Pcos;
1775 libtime::TRelativeTime binsize(opt.
binsize);
1776 TFXX_debug(opt.
debug,
"main",
1777 "earliesttime: " << earliesttime.timestring()
1778 <<
" latesttime: " << latesttime.timestring());
1780 for (Tvecofgaps::const_iterator S=vecofgaps.begin();
1781 S!=vecofgaps.end(); ++S)
1795 TFXX_debug(opt.
debug,
"main",
1796 "just wrote completeness time series with " 1808 TFXX_debug(opt.
debug,
"main",
1809 "close completeness datstream");
1811 TFXX_debug(opt.
debug,
"main",
1812 "close completeness file stream");
1840 cout <<
"step 4: copy data" << endl;
1842 { cout <<
"extract integer samples" << endl; }
1844 { cout <<
"write SFF with GSE compatible scaling (ampfac)" << endl; }
1848 if (opt.
verbose) { cout <<
"open output file " << outfile << endl; }
1850 if (!opt.
overwrite) { datrw::abort_if_exists(outfile); }
1851 std::ofstream ofs(outfile.c_str(),
1855 filefree.append(
"index file(s):");
1856 infile=infiles.begin();
1857 while( infile!=infiles.end()) { filefree.append(*infile); ++infile; }
1858 if ((!opt.
nofreeblock) && os.handlesfilefree()) { os << filefree; }
1859 if (opt.
debug) { cout <<
"DEBUG: file FREE block:" << endl << filefree; }
1862 datrw::sequentialtracereader is(opt.
debug);
1864 for (
unsigned int itrace=0; itrace<trace.size(); ++itrace)
1866 if (opt.
verbose) { cout <<
"compile trace #" << itrace+1 <<
":" << endl; }
1870 for (
unsigned int iblock=0; iblock<thetrace.size(); ++iblock)
1872 totalsamples+=thetrace[iblock].nsamples;
1875 { cout <<
" extract " << totalsamples <<
" samples" << endl; }
1876 sff::FREE tracefree;
1877 sff::INFO traceinfo;
1879 sff::WID2 tracewid2;
1885 TFXX_debug(opt.
debug,
"anyextract",
1886 "allocate integer data series");
1888 tracefree.append(
"copy integer data");
1892 TFXX_debug(opt.
debug,
"anyextract",
1893 "allocate float data series");
1895 tracefree.append(
"copy float (single precision) data");
1899 TFXX_debug(opt.
debug,
"anyextract",
1900 "allocate double data series");
1902 tracefree.append(
"copy double precision data");
1904 TFXX_debug(opt.
debug,
"anyextract",
1905 "output series container is allocated");
1908 tracewid2=collection[thetrace[0].ientry].wid2;
1909 tracewid2.nsamples=totalsamples;
1910 tracewid2.date=sff::wid2isample(tracewid2,thetrace[0].ifirst);
1912 tracefree.append(
"read data from:");
1913 for (
unsigned int iblock=0; iblock<thetrace.size(); ++iblock)
1917 std::ostringstream freeline;
1919 " trace #" << theentry.
itrace <<
1921 tracefree.append(freeline.str());
1926 cout <<
" open input file " << theentry.
filename 1928 <<
"trace: " << theentry.
itrace <<
")" 1934 TFXX_assert(is.good(),
"ERROR: input is not good!");
1937 TFXX_assert(is.providesi(),
1938 "ERROR: integer data requested\n" 1939 "but not provided by input stream");
1940 is >> inseries.
is();
1944 TFXX_assert(is.providesf(),
1945 "ERROR: float data requested\n" 1946 "but not provided by input stream");
1947 is >> inseries.
fs();
1951 TFXX_assert(is.providesd(),
1952 "ERROR: double precision data requested\n" 1953 "but not provided by input stream");
1954 is >> inseries.
ds();
1957 <=
int(inseries.
last())),
1958 "ERROR: too few samples in input data!");
1959 TFXX_assert((theblock.
ifirst >=
int(inseries.
first())),
1960 "ERROR: range error in input data!");
1962 <=
int(tracedata.
last())),
1963 "ERROR: too few samples reserved in output data!");
1964 TFXX_assert((theblock.
ofirst >=
int(tracedata.
first())),
1965 "ERROR: range error in output data!");
1966 for (
int isample=0; isample<theblock.
nsamples; ++isample)
1968 int ioutsample=isample+theblock.
ofirst;
1969 int iinsample=isample+theblock.
ifirst;
1972 TFXX_assert(((ioutsample>=tracedata.
first())
1973 &&(ioutsample<=tracedata.
last())
1974 &&(iinsample>=inseries.
first())
1975 &&(iinsample<=inseries.
last())),
1976 "ERROR: series index out of range!");
1979 { tracedata.
is()(ioutsample)=inseries.
is()(iinsample); }
1981 { tracedata.
fs()(ioutsample)=inseries.
fs()(iinsample); }
1983 { tracedata.
ds()(ioutsample)=inseries.
ds()(iinsample); }
1986 if ((iblock == 0) && (is.hasinfo()))
1987 { is >> traceinfo; hasinfo=
true; }
1993 if (opt.
verbose) { cout <<
" write output trace" << endl; }
1995 if ((!opt.
nofreeblock) && os.handlestracefree()) { os << tracefree; }
1996 if (hasinfo && os.handlesinfo()) { os << traceinfo; }
1999 os << tracedata.
is();
2003 os << tracedata.
fs();
2007 os << tracedata.
ds();
libtime::TRelativeTime dt
Tvecofgaps gaps(const libtime::TAbsoluteTime &earliest, const libtime::TAbsoluteTime &latest, const TContiguouslist &cl, const bool &debug)
a class to hold a gap series for one stream
struct to hold regular expressions.
std::string gapoutputfile
structs used in primary gap analysis (prototypes)
class to define completenessbins (prototypes)
struct to hold index values defining a data block within a trace.
GapSeries seriesofmissingsamples(const Gapsofstream &gos, const CompletenessBins &cb, const bool &debug)
Function to extract a GapSeries from given Gapsofstream.
class to define the time axis of the completeness time series.
class to handle two different flavours of data
bool dumpintermediateresults
CompareCollection(const Tvecofindexentries &collection)
const Tvecofindexentries & Mcollection
struct to hold an index file entry.
bool allowduplicatesamples
std::string completenessseriesfile
libtime::TAbsoluteTime first
A comparison function class for the collection.
tfxx::string::regexx instrument
bool breakonduplicatesamples
libtime::TAbsoluteTime first
CompletenessSeries completeness(const GapSeries &gs, const bool &debug)
convert gaps to completeness
tfxx::string::regexx channel
Indicate a contiguous set of data.
std::string selinstrument
std::string gapseriesfile
tfxx::string::regexx auxid
unsigned int summarizelevel
a class to hold a completeness series for one stream
tfxx::string::regexx station
std::list< Contiguous > TContiguouslist
A list to store information on all sequences of contiguous data.
std::vector< Gapsofstream > Tvecofgaps
vector to hold all gaps
void gnuplotplot(std::ostream &os, const std::string &psname, const CompletenessBins &cb, const Tvecofgaps &vog)
std::vector< std::string > Tvecofstrings
bool operator()(const int &i1, const int &i2)
libtime::TAbsoluteTime last
void printgaps(std::ostream &os, const Tvecofgaps &vog, const libtime::TAbsoluteTime &earliest, const libtime::TAbsoluteTime &latest, const unsigned int &lev)
all structs use to produce series of gaps and series of completeness (prototypes) ...
function prototypes used in gap analysis (prototypes)
libtime::TAbsoluteTime last
Tcompletenessseries completeness